Пример #1
0
    def _notify_on_past_due_scheduled_backups(self):
        """
            Send notifications for jobs that has been scheduled for a period
            longer than min(half the frequency, 5 hours) of its plan.
             If backup does not have a plan (i.e. one off)
             then it will check after 60 seconds.
        """
        # query for backups whose scheduled date is before current date minus
        # than max starvation time

        q = {
            "state": State.SCHEDULED,
        }

        past_due_backup_infos = []

        for backup in get_mbs().backup_collection.find_iter(q, no_cursor_timeout=True):
            if self.is_backup_past_due(backup):
                past_due_backup_infos.append("%s (%s)" % (str(backup.id), backup.source.get_source_info()))

        if past_due_backup_infos:
            msg = ("Backup(s) in SCHEDULED for too long: \n%s" % ", \n".join(past_due_backup_infos))
            logger.info(msg)
            logger.info("Sending a notification...")
            sbj = PAST_DUE_ALERT_SUBJECT
            get_mbs().notifications.send_notification(sbj, msg, notification_type=NotificationType.EVENT,
                                                      priority=NotificationPriority.CRITICAL)
            self._alerting_on_past_due = True

        elif self._alerting_on_past_due:
            self._clear_past_due_alert()
Пример #2
0
    def run_generator(self, dry_run=False):

        try:
            if dry_run:
                logger.info("----- DRY RUN ------")

            logger.info("Running plan generator '%s' " % self.name)
            # remove expired plans
            for plan in self.get_plans_to_remove():
                if not dry_run:
                    self._backup_system.remove_plan(plan.id)
                else:
                    logger.info("DRY RUN: remove plan '%s' " % plan.id)

            # save new plans
            for plan in self.get_plans_to_save():
                try:
                    if not dry_run:
                        self._backup_system.save_plan(plan)
                    else:
                        logger.info("DRY RUN: save plan: %s" % plan)
                except Exception, ex:
                    logger.exception("Error while saving plan %s" % plan)

                    get_mbs().notifications.send_event_notification("Error in saving plan for generator %s" %
                                                                    self.name,
                                                                    str(ex), priority=NotificationPriority.CRITICAL)
        except Exception, ex:
            logger.exception("Error in running plan generator %s" % self.name)

            get_mbs().notifications.send_event_notification("Error in running plan generator %s" % self.name,
                                                            str(ex), priority=NotificationPriority.CRITICAL)
Пример #3
0
    def worker_fail(self, exception, trace=None):
        if isinstance(exception, MBSError):
            log_msg = exception.message
        else:
            log_msg = "Unexpected error. Please contact admin"

        details = safe_stringify(exception)
        task = self._task

        self.get_task_collection().update_task(
            task,
            event_type=EventType.ERROR,
            message=log_msg,
            details=details,
            error_code=to_mbs_error_code(exception))

        # update retry info
        set_task_retry_info(task, exception)

        self.worker_finished(State.FAILED)

        # send a notification only if the task is not reschedulable
        # if there is an event queue configured then do not notify (because it should be handled by the backup
        # event listener)
        if not get_mbs().event_queue and task.exceeded_max_tries():
            get_mbs().notifications.notify_on_task_failure(
                task, exception, trace)
Пример #4
0
    def run_generator(self, dry_run=False):

        try:
            if dry_run:
                logger.info("----- DRY RUN ------")

            logger.info("Running plan generator '%s' " % self.name)
            # remove expired plans
            for plan in self.get_plans_to_remove():
                if not dry_run:
                    self._backup_system.remove_plan(plan.id)
                else:
                    logger.info("DRY RUN: remove plan '%s' " % plan.id)

            # save new plans
            for plan in self.get_plans_to_save():
                try:
                    if not dry_run:
                        self._backup_system.save_plan(plan)
                    else:
                        logger.info("DRY RUN: save plan: %s" % plan)
                except Exception, ex:
                    logger.exception("Error while saving plan %s" % plan)

                    get_mbs().notifications.send_event_notification(
                        "Error in saving plan for generator %s" % self.name,
                        str(ex),
                        priority=NotificationPriority.CRITICAL)
        except Exception, ex:
            logger.exception("Error in running plan generator %s" % self.name)

            get_mbs().notifications.send_event_notification(
                "Error in running plan generator %s" % self.name,
                str(ex),
                priority=NotificationPriority.CRITICAL)
    def save_plan(self, plan):
        try:

            self.debug("Validating plan %s" % plan)
            errors = plan.validate()
            if errors:
                err_msg = ("Plan %s is invalid."
                           "Please correct the following errors and then try"
                           " saving again.\n%s" % (plan, errors))

                raise BackupSystemError(err_msg)

            # set plan created date if its not set
            if not plan.created_date:
                plan.created_date = date_now()

            is_new_plan = not plan.id

            if is_new_plan:
                self.info("Saving new plan: \n%s" % plan)
                plan_doc = plan.to_document()
                get_mbs().plan_collection.save_document(plan_doc)
                plan.id = plan_doc["_id"]
                self.info("Plan saved successfully")
            else:
                self.info("Updating plan: \n%s" % plan)
                self.update_existing_plan(plan)
                self.info("Plan updated successfully")


        except Exception, e:
            raise BackupSystemError("Error while saving plan %s. %s" %
                                       (plan, e))
Пример #6
0
    def _send_audit_report(self, auditor, report):
        subject = ("%s Audit Report for %s" %
                   (auditor.name, datetime_to_string(report.audit_date)))

        message = report.summary()
        get_mbs().notifications.send_notification(subject, message,
                                                  notification_type="audit")
    def schedule_backup_restore(self, backup_id, destination_uri,tags=None,
                                no_index_restore=None, no_users_restore=None, no_roles_restore=None,
                                source_database_name=None):
        backup = get_mbs().backup_collection.get_by_id(backup_id)
        destination = build_backup_source(destination_uri)
        logger.info("Scheduling a restore for backup '%s'" % backup.id)
        restore = Restore()

        restore.state = State.SCHEDULED
        restore.source_backup = backup
        restore.source_database_name = source_database_name
        restore.strategy = backup.strategy
        restore.strategy.no_index_restore = no_index_restore
        restore.strategy.no_users_restore = no_users_restore
        restore.strategy.no_roles_restore = no_roles_restore
        restore.destination = destination
        # resolve tags
        tags = tags or restore.source_backup.tags
        restore.tags = tags

        rc = get_mbs().restore_collection
        try:
            self._resolve_task_tags(restore)
        except Exception, ex:
            self._task_failed_to_schedule(restore, rc, ex)
 def tick(self):
     try:
         self._master.monitor_master()
     except Exception, ex:
         logger.exception("MbsMasterMonitor error")
         get_mbs().notifications.send_event_notification("MbsMasterMonitor error",
                                                         str(ex), priority=NotificationPriority.CRITICAL)
 def _save_plan_next_occurrence(self, plan):
     q = {"_id": plan.id}
     u = {
         "$set": {
             "nextOccurrence": plan.next_occurrence
         }
     }
     get_mbs().plan_collection.update(spec=q, document=u)
Пример #10
0
    def _send_audit_report(self, auditor, report):
        subject = ("%s Audit Report for %s" %
                   (auditor.name, datetime_to_string(report.audit_date)))

        message = report.summary()
        get_mbs().notifications.send_notification(subject,
                                                  message,
                                                  notification_type="audit")
    def schedule_backup(self, **kwargs):

        try:
            backup = Backup()
            backup.created_date = date_now()
            backup.strategy = get_validate_arg(kwargs,
                                               "strategy",
                                               expected_type=BackupStrategy)
            backup.source = get_validate_arg(kwargs, "source", BackupSource)
            backup.target = get_validate_arg(kwargs, "target", BackupTarget)
            backup.priority = get_validate_arg(kwargs,
                                               "priority",
                                               expected_type=(int, long, float,
                                                              complex),
                                               required=False)
            backup.plan_occurrence = \
                get_validate_arg(kwargs, "plan_occurrence",
                                 expected_type=datetime,
                                 required=False)
            backup.plan = get_validate_arg(kwargs,
                                           "plan",
                                           expected_type=BackupPlan,
                                           required=False)

            backup.secondary_targets = get_validate_arg(kwargs,
                                                        "secondary_targets",
                                                        expected_type=list,
                                                        required=False)

            backup.change_state(State.SCHEDULED)
            # set tags
            tags = get_validate_arg(kwargs,
                                    "tags",
                                    expected_type=dict,
                                    required=False)

            backup.tags = tags

            bc = get_mbs().backup_collection
            try:
                # resolve tags

                self._resolve_task_tags(backup)
            except Exception, ex:
                self._task_failed_to_schedule(backup, bc, ex)

            backup_doc = backup.to_document()
            get_mbs().backup_collection.save_document(backup_doc)
            # set the backup id from the saved doc

            backup.id = backup_doc["_id"]

            self.info("Saved backup \n%s" % backup)

            if backup.state == State.FAILED:
                trigger_task_finished_event(backup, State.FAILED)

            return backup
 def tick(self):
     try:
         self._delete_backups_targets_due()
     except Exception, ex:
         logger.exception("BackupSweeper Error")
         subject = "BackupSweeper Error"
         message = ("BackupSweeper Error!.\n\nStack Trace:\n%s" %
                    traceback.format_exc())
         get_mbs().notifications.send_error_notification(subject, message)
    def create_backup_plan(self, **kwargs):
        try:
            plan = BackupPlan()
            plan.created_date = date_now()

            plan.description = get_validate_arg(kwargs, "description",
                                             expected_type=(str, unicode),
                                             required=False)

            plan.strategy = get_validate_arg(kwargs, "strategy",
                                             expected_type=BackupStrategy)


            plan.schedule = get_validate_arg(kwargs, "schedule",
                                             expected_type=AbstractSchedule)

            plan.source = get_validate_arg(kwargs, "source",
                                           expected_type=BackupSource)

            plan.target = get_validate_arg(kwargs, "target",
                                           expected_type=BackupTarget)

            plan.retention_policy = get_validate_arg(kwargs, "retention_policy",
                                                     expected_type=
                                                     RetentionPolicy,
                                                     required=False)

            plan.priority = get_validate_arg(kwargs, "priority",
                                             expected_type=(int, long,
                                                            float, complex),
                                             required=False)

            plan.secondary_targets = get_validate_arg(kwargs,
                                                      "secondary_targets",
                                                      expected_type=list,
                                                      required=False)

            # tags
            plan.tags = get_validate_arg(kwargs, "tags", expected_type=dict,
                                         required=False)

            plan_doc = plan.to_document()
            get_mbs().plan_collection.save_document(plan_doc)
            # set the backup plan id from the saved doc

            plan.id = plan_doc["_id"]

            self.info("Saved backup plan \n%s" % plan)
            # process plan to set next occurrence
            self._scheduler._process_plan(plan)
            return plan
        except Exception, e:
            args_str = dict_to_str(kwargs)
            msg = ("Failed to create plan. Args:\n %s" % args_str)
            logger.error(msg)
            logger.error(traceback.format_exc())
            raise CreatePlanError(msg=msg, cause=e)
Пример #14
0
    def daily_audit_report(self, audit_date):

        logger.info("PlanScheduleAuditor: Generating %s audit report for '%s'"
                    % (TYPE_PLAN_AUDIT,  datetime_to_string(audit_date)))

        audit_end_date = date_plus_seconds(audit_date, 3600 * 24)
        all_plans_report = PlanScheduleAuditReport()
        all_plans_report.audit_date = audit_date
        all_plans_report.audit_type = TYPE_PLAN_AUDIT

        total_plans = 0
        failed_plan_reports = []
        all_warned_audits = []
        total_warnings = 0
        for plan in get_mbs().plan_collection.find_iter(no_cursor_timeout=True):
            logger.info("PlanScheduleAuditor: Processing plan %s" % plan.id)
            plan_report = self._create_plan_audit_report(plan, audit_date)

            if plan_report.has_failures():
                failed_plan_reports.append(plan_report)
            if plan_report.has_warnings():
                # only append to warned audits if report doesn't have failures
                if not plan_report.has_failures():
                    all_warned_audits.extend(plan_report.warned_audits)

                total_warnings += 1

            total_plans += 1

        total_failures = len(failed_plan_reports)

        if failed_plan_reports:
            all_plans_report.failed_audits = failed_plan_reports
        if all_warned_audits:
            all_plans_report.warned_audits = all_warned_audits

        all_plans_report.total_audits = total_plans
        all_plans_report.total_failures = total_failures
        all_plans_report.total_success = total_plans - total_failures
        all_plans_report.total_warnings = total_warnings

        logger.info("PlanScheduleAuditor: Generated report:\n%s " %
                    all_plans_report)

        # alert if failed audits are >= max allowed percent of total
        if float(total_failures) / total_plans > self.max_allowed_failures_percentage:
            subject = "%s Auditor Failure: Too many failures!!!" % self.name
            msg = "There are %s failures out of %s which is > %s%%" % (total_failures, total_plans,
                                                                       self.max_allowed_failures_percentage * 100)
            logger.error(subject)
            logger.error(msg)
            get_mbs().notifications.send_event_notification(subject, msg, priority=NotificationPriority.CRITICAL)
        else:
            logger.info("NO ALERT for %s Auditor: There are %s failures out of %s which is < %s%%" %
                        (self.name,total_failures, total_plans, self.max_allowed_failures_percentage * 100))

        return all_plans_report
    def _notify_on_past_due_scheduled_backups(self):
        """
            Send notifications for jobs that has been scheduled for a period
            longer than min(half the frequency, 5 hours) of its plan.
             If backup does not have a plan (i.e. one off)
             then it will check after 60 seconds.
        """
        # query for backups whose scheduled date is before current date minus
        # than max starvation time

        where = (
            "(Math.min(%s, (this.plan.schedule.frequencyInSeconds / 2) * 1000) + "
            "this.createdDate.getTime()) < new Date().getTime()" %
            (MAX_BACKUP_WAIT_TIME * 1000))
        one_off_starve_date = date_minus_seconds(date_now(),
                                                 ONE_OFF_BACKUP_MAX_WAIT_TIME)
        q = {
            "state":
            STATE_SCHEDULED,
            "$or": [
                # backups with plans starving query
                {
                    "$and": [{
                        "plan": {
                            "$exists": True
                        }
                    }, {
                        "$where": where
                    }]
                },
                # One off backups (no plan) starving query
                {
                    "$and": [{
                        "plan": {
                            "$exists": False
                        }
                    }, {
                        "createdDate": {
                            "$lt": one_off_starve_date
                        }
                    }]
                }
            ]
        }

        starving_backups = get_mbs().backup_collection.find(q)

        if starving_backups:
            msg = ("You have %s scheduled backups that has past the maximum "
                   "waiting time (%s seconds)." %
                   (len(starving_backups), MAX_BACKUP_WAIT_TIME))
            self.info(msg)

            self.info("Sending a notification...")
            sbj = "Past due scheduled backups"
            get_mbs().send_notification(sbj, msg)
Пример #16
0
    def tick(self):

        try:
            self._process_plans_considered_now(process_max_count=100)
        except Exception, e:
            logger.error("Caught an error: '%s'.\nStack Trace:\n%s" %
                         (e, traceback.format_exc()))
            subject = "Plan Scheduler Error"
            message = ("%s.\n\nStack Trace:\n%s" % (e, traceback.format_exc()))
            get_mbs().notifications.send_error_notification(subject, message)
    def tick(self):

        try:
            self._process_plans_considered_now(process_max_count=100)
        except Exception, e:
            logger.error("Caught an error: '%s'.\nStack Trace:\n%s" %
                         (e, traceback.format_exc()))
            subject = "Plan Scheduler Error"
            message = ("%s.\n\nStack Trace:\n%s" % (e, traceback.format_exc()))
            get_mbs().notifications.send_error_notification(subject, message)
Пример #18
0
 def remove_plan(self, plan_id):
     plan = get_mbs().plan_collection.get_by_id(plan_id)
     if plan:
         plan.deleted_date = date_now()
         logger.info("Adding plan '%s' to deleted plans" % plan_id)
         get_mbs().deleted_plan_collection.save_document(plan.to_document())
         logger.info("Removing plan '%s' from plans" % plan_id)
         get_mbs().plan_collection.remove_by_id(plan_id)
     else:
         logger.info("No such plan '%s'" % plan_id)
    def schedule_backup(self, **kwargs):

        try:
            backup = Backup()
            backup.created_date = date_now()
            backup.strategy = get_validate_arg(kwargs, "strategy",
                                               expected_type=BackupStrategy)
            backup.source = get_validate_arg(kwargs, "source", BackupSource)
            backup.target = get_validate_arg(kwargs, "target", BackupTarget)
            backup.priority = get_validate_arg(kwargs, "priority",
                                               expected_type=(int, long,
                                                              float, complex),
                                               required=False)
            backup.plan_occurrence = \
                get_validate_arg(kwargs, "plan_occurrence",
                                 expected_type=datetime,
                                 required=False)
            backup.plan = get_validate_arg(kwargs, "plan",
                                           expected_type=BackupPlan,
                                           required=False)

            backup.secondary_targets = get_validate_arg(kwargs,
                                                        "secondary_targets",
                                                        expected_type=list,
                                                        required=False)

            backup.change_state(State.SCHEDULED)
            # set tags
            tags = get_validate_arg(kwargs, "tags", expected_type=dict,
                                    required=False)

            backup.tags = tags

            bc = get_mbs().backup_collection
            try:
                # resolve tags

                self._resolve_task_tags(backup)
            except Exception, ex:
                self._task_failed_to_schedule(backup, bc, ex)

            self.set_custom_backup_props(backup)

            backup_doc = backup.to_document()
            get_mbs().backup_collection.save_document(backup_doc)
            # set the backup id from the saved doc

            backup.id = backup_doc["_id"]

            self.info("Saved backup \n%s" % backup)

            if backup.state == State.FAILED:
                trigger_task_finished_event(backup, State.FAILED)

            return backup
 def remove_plan(self, plan_id):
     plan = get_mbs().plan_collection.get_by_id(plan_id)
     if plan:
         plan.deleted_date = date_now()
         logger.info("Adding plan '%s' to deleted plans" % plan_id)
         get_mbs().deleted_plan_collection.save_document(plan.to_document())
         logger.info("Removing plan '%s' from plans" % plan_id)
         get_mbs().plan_collection.remove_by_id(plan_id)
         return True
     else:
         logger.info("No such plan '%s'" % plan_id)
         return False
Пример #21
0
    def generate_audit_report(self, auditor, date):
        try:
            report = auditor.daily_audit_report(date)
            logger.info("GlobalAuditor: Saving audit report: \n%s" % report)
            self._audit_collection.save_document(report.to_document())

            # send audit report
            self._send_audit_report(auditor, report)
        except Exception, e:
            sbj = "Auditor %s Error" % auditor.name
            msg = "Auditor %s Error!.\n\nStack Trace:\n%s" % (auditor.name, traceback.format_exc())
            get_mbs().notifications.send_error_notification(sbj, msg)
    def _expire_backups_due(self):
        logger.info("BackupExpirationManager: START EXPIRATION CHECK CYCLE")

        # expire recurring backups
        try:
            self._expire_due_recurring_backups()
        except Exception, ex:
            logger.exception("BackupExpirationManager error during recurring backups expiration")
            subject = "BackupExpirationManager Error"
            message = ("BackupExpirationManager Error!.\n\nStack Trace:\n%s" %
                       traceback.format_exc())
            get_mbs().notifications.send_error_notification(subject, message)
Пример #23
0
    def generate_audit_report(self, auditor, date):
        try:
            report = auditor.daily_audit_report(date)
            logger.info("GlobalAuditor: Saving audit report: \n%s" % report)
            self._audit_collection.save_document(report.to_document())

            # send audit report
            self._send_audit_report(auditor, report)
        except Exception, e:
            sbj = "Auditor %s Error" % auditor.name
            msg = ("Auditor %s Error!.\n\nStack Trace:\n%s" %
                   (auditor.name, traceback.format_exc()))
            get_mbs().notifications.send_error_notification(sbj, msg)
Пример #24
0
    def worker_crashed(self, worker):
        # page immediately
        subject = "Worker crashed for %s %s!" % (worker.task.type_name, worker.task.id)

        errmsg = ("Worker crash detected! Worker (id %s, pid %s, %s"
                  " id '%s') finished with a non-zero exit code '%s'"
                  % (worker.id, worker.pid, worker.task.type_name, worker.task.id, worker.exit_code))

        exception = EngineWorkerCrashedError(errmsg)
        get_mbs().notifications.send_error_notification(subject, errmsg)

        self.error(errmsg)
        self._cleanup_worker_resources(worker)
        worker.worker_fail(exception)
    def _get_plans_to_consider_now(self, limit=None):
        """
        Returns list of plans that the scheduler should process at this time.
        Those are:
            1- Plans with no backups scheduled yet (next occurrence has not
            been calculated yet)

            2- Plans whose next occurrence is now or in the past

        """
        now = date_now()
        q = {
            "$or": [{
                "nextOccurrence": None
            }, {
                "nextOccurrence": {
                    "$lte": now
                }
            }]
        }

        # sort by priority
        s = [("priority", 1)]

        return get_mbs().plan_collection.find_iter(q, sort=s, limit=limit)
    def _cancel_past_cycle_backups(self):
        """
        Cancels scheduled backups (or backups failed to be scheduled,
         i.e. engine guid is none) whose plan's next occurrence in in the past
        """
        now = date_now()

        q = {
            "state": {
                "$in": [State.SCHEDULED, State.FAILED]
            },
            "plan.nextOccurrence": {
                "$lte": now
            },
            "engineGuid": None
        }

        bc = get_mbs().backup_collection
        for backup in bc.find(q):
            logger.info("Cancelling backup %s" % backup.id)
            backup.state = State.CANCELED
            bc.update_task(backup,
                           properties="state",
                           event_name=EVENT_STATE_CHANGE,
                           message="Backup is past due. Canceling...")
    def _expire_due_onetime_backups(self):
        # process onetime backups
        logger.info("BackupExpirationManager: Finding all onetime backups "
                    "due for expiration")

        total_processed = 0
        total_expired = 0
        total_dont_expire = 0
        q = _check_to_expire_query()

        q["plan._id"] = {
            "$exists": False
        }

        logger.info("BackupExpirationManager: Executing query :\n%s" %
                    document_pretty_string(q))
        onetime_backups_iter = get_mbs().backup_collection.find_iter(query=q, no_cursor_timeout=True)

        for onetime_backup in onetime_backups_iter:
            if self.stop_requested:
                break

            total_processed += 1
            if self.is_onetime_backup_not_expirable(onetime_backup):
                mark_backup_never_expire(onetime_backup)
                total_dont_expire += 1
            elif self.is_onetime_backup_due_for_expiration(onetime_backup):
                self.expire_backup(onetime_backup)
                total_expired += 1

        logger.info("BackupExpirationManager: Finished processing Onetime"
                    " Backups.\nTotal Expired=%s, Total Don't Expire=%s, "
                    "Total Processed=%s" %
                    (total_expired, total_dont_expire, total_processed))
    def process_plan_retention(self, plan):
        q = _check_to_expire_query()
        q["plan._id"] = plan.id

        plan_backups = get_mbs().backup_collection.find(q)

        self._process_plan(plan, plan_backups)
    def _get_plans_to_consider_now(self):
        """
        Returns list of plans that the scheduler should process at this time.
        Those are:
            1- Plans with no backups scheduled yet (next occurrence has not
            been calculated yet)

            2- Plans whose next occurrence is now or in the past

        """
        now = date_now()
        q = {
            "$or": [{
                "nextOccurrence": {
                    "$exists": False
                }
            }, {
                "nextOccurrence": None
            }, {
                "nextOccurrence": {
                    "$lte": now
                }
            }]
        }

        return get_mbs().plan_collection.find(q)
    def reschedule_backup(self, backup, from_scratch=False):
        """
            Reschedules the backup IF backup state is FAILED and
                        backup is still within it's plan current cycle
        """
        if backup.state != STATE_FAILED:
            msg = ("Cannot reschedule backup ('%s', '%s'). Rescheduling is "
                   "only allowed for backups whose state is '%s'." %
                   (backup.id, backup.state, STATE_FAILED))
            raise BackupSystemError(msg)
        elif backup.plan and backup.plan.next_occurrence <= date_now():
            msg = ("Cannot reschedule backup '%s' because its occurrence is"
                   " in the past of the current cycle" % backup.id)
            raise BackupSystemError(msg)

        self.info("Rescheduling backup %s" % backup._id)
        backup.state = STATE_SCHEDULED
        # regenerate backup tags if backup belongs to a plan
        if backup.plan:
            backup.tags = backup.plan.generate_tags()

        bc = get_mbs().backup_collection
        # if from_scratch is set then clear backup log
        if from_scratch:
            backup.logs = []
            backup.try_count = 0
            backup.engine_guid = None
            bc.update_task(backup,
                           properties=["logs", "tryCount", "engineGuid"])

        bc.update_task(backup,
                       properties=["state", "tags"],
                       event_name=EVENT_STATE_CHANGE,
                       message="Rescheduling")
    def reschedule_restore(self, restore, force=False):
        """
            Reschedules the restore IF state is FAILED
        """
        if restore.state != State.FAILED:
            msg = ("Cannot reschedule restore ('%s', '%s'). Rescheduling is "
                   "only allowed for restores whose state is '%s'." %
                   (restore.id, restore.state, State.FAILED))
            raise BackupSystemError(msg)

        self.info("Rescheduling restore %s" % restore.id)
        props = ["state", "tags"]
        restore.state = State.SCHEDULED

        rc = get_mbs().restore_collection
        # if force is set then clear restore log
        if force:
            restore.logs = []
            restore.try_count = 0
            restore.engine_guid = None
            props.extend(["logs", "tryCount", "engineGuid"])


        rc.update_task(restore, properties=props,
                       event_name=EVENT_STATE_CHANGE,
                       message="Rescheduling")
Пример #32
0
def update_backup(backup, properties=None, event_name=None,
                  event_type=EventType.INFO, message=None, details=None,
                  error_code=None):
    bc = get_mbs().backup_collection
    bc.update_task(backup, properties=properties, event_name=event_name,
                   event_type=event_type, message=message, details=details,
                   error_code=error_code,
                   w=1)
Пример #33
0
def update_restore(restore, properties=None, event_name=None,
                   event_type=EventType.INFO, message=None, details=None,
                   error_code=None):
    rc = get_mbs().restore_collection
    rc.update_task(restore, properties=properties, event_name=event_name,
                   event_type=event_type, message=message, details=details,
                   error_code=error_code,
                   w=1)
Пример #34
0
    def update_task(self,
                    task,
                    properties=None,
                    event_name=None,
                    event_type=EventType.INFO,
                    message=None,
                    details=None,
                    error_code=None,
                    **update_kwargs):
        """
            Updates the specified properties of the specified MBSTask object
        """
        task_doc = task.to_document()
        q = {"_id": task.id}

        u = {}

        # log the event as needed
        if event_name or message:
            log_entry = task.log_event(name=event_name,
                                       event_type=event_type,
                                       message=message,
                                       details=details,
                                       error_code=error_code)
            # push if "logs" property is not included
            if not (properties and "logs" in properties):
                u["$push"] = {"logs": log_entry.to_document()}

        # construct $set operator
        if properties:
            properties = listify(properties)
            u["$set"] = {}
            for prop in properties:
                u["$set"][prop] = task_doc.get(prop)

        if not u or ("$set" not in u and "$push" not in u):
            import mbs
            import notification.handler
            mbs.get_mbs().notifications.send_event_notification(
                "BAD UPDATE",
                "BAD UPDATE for task %s: %s , %s" %
                (task.id, u, traceback.format_exc()),
                priority=notification.handler.NotificationPriority.CRITICAL)
            raise Exception("BAD UPDATE!!!!!")

        self.update(spec=q, document=u, **update_kwargs)
    def master_instance_run(self):
        # ensure mbs indexes
        get_mbs().ensure_mbs_indexes()
        # Start expiration managers
        self._start_expiration_managers()

        # Start plan generators
        self._start_plan_generators()

        # start backup monitor
        self._start_backup_monitor()

        # start the scheduler
        self._start_scheduler()

        # start the master monitor
        self._start_master_monitor()
    def master_instance_run(self):
        # ensure mbs indexes
        get_mbs().ensure_mbs_indexes()
        # Start expiration managers
        self._start_expiration_managers()

        # Start plan generators
        self._start_plan_generators()

        # start backup monitor
        self._start_backup_monitor()

        # start the scheduler
        self._start_scheduler()


        # start the master monitor
        self._start_master_monitor()
Пример #37
0
    def _lookup_backup_by_plan_occurrence(self, plan, plan_occurrence):

        q = {
            "plan._id": plan._id,
            "planOccurrence":plan_occurrence,
            }
        c = get_mbs().backup_collection

        return c.find_one(q)
    def _process_failed_backups(self):
        """
        Reschedule failed backups that failed and are retriable
        """

        q = {"state": State.FAILED, "nextRetryDate": {"$lt": date_now()}}

        for backup in get_mbs().backup_collection.find(q):
            self._process_failed_backup(backup)
Пример #39
0
    def _process_failed_backups(self):
        """
        Reschedule failed backups that failed and are retriable
        """

        q = {"state": State.FAILED, "nextRetryDate": {"$lt": date_now()}}

        for backup in get_mbs().backup_collection.find_iter(q):
            try:
                self._process_failed_backup(backup)
            except Exception, ex:
                subject = "Plan Scheduler Error"
                message = (
                    "Error while rescheduling backup '%s'. Cause: %s.\n\nStack Trace:\n%s"
                    % (backup.id, ex, traceback.format_exc()))
                logger.exception(message)
                get_mbs().notifications.send_error_notification(
                    subject, message)
    def reschedule_all_failed_backups(self, from_scratch=False):
        self.info("Rescheduling all failed backups")

        q = {"state": STATE_FAILED}

        for backup in get_mbs().backup_collection.find(q):
            try:
                self.reschedule_backup(backup, from_scratch=from_scratch)
            except Exception, e:
                logger.error(e)
    def global_auditor(self):
        if not self._global_auditor:
            ac = get_mbs().audit_collection
            self._global_auditor = GlobalAuditor(audit_collection=ac)
            # register auditors with global auditor
            if self.auditors:
                for auditor in self.auditors:
                    self._global_auditor.register_auditor(auditor)

        return self._global_auditor
Пример #42
0
    def tick(self):
        while True:

            try:
                plan = self._plan_queue.get_nowait()
            except Queue.Empty:
                # breaking
                break
            try:
                self._scheduler._process_plan(plan)
            except Exception, e:
                logger.exception("Error while processing plan '%s'. "
                                 "Cause: %s" % (plan.id, e))

                subject = "Plan Scheduler Error"
                message = ("Error while processing plan '%s'. Cause: %s.\n\nStack Trace:\n%s" %
                           (plan.id, e, traceback.format_exc()))
                get_mbs().notifications.send_error_notification(subject, message)
            finally:
Пример #43
0
    def daily_audit_report(self, audit_date):

        logger.info("PlanAuditor: Generating %s audit report for '%s'" %
                    (TYPE_PLAN_AUDIT,  datetime_to_string(audit_date)))

        audit_end_date = date_plus_seconds(audit_date, 3600 * 24)
        all_plans_report = AuditReport()
        all_plans_report.audit_date = audit_date
        all_plans_report.audit_type = TYPE_PLAN_AUDIT

        total_plans = 0
        failed_plan_reports = []
        all_warned_audits = []
        total_warnings = 0
        for plan in get_mbs().plan_collection.find():
            # skip recently added plans whose created date is after audit date
            # and their next occurrence is not in auditing range
            if (plan.created_date > audit_date and plan.next_occurrence and
                plan.next_occurrence > audit_end_date) :
                logger.info("PlanAuditor: Skipping auditing plan '%s' since"
                            " its created date '%s' is later than audit date "
                            "'%s'" % (plan.id,
                                      datetime_to_string(plan.created_date),
                                      datetime_to_string(audit_date)))
                continue

            plan_report = self._create_plan_audit_report(plan, audit_date)

            if plan_report.has_failures():
                failed_plan_reports.append(plan_report)
            if plan_report.has_warnings():
                # only append to warned audits if report doesn't have failures
                if not plan_report.has_failures():
                    all_warned_audits.extend(plan_report.warned_audits)

                total_warnings += 1

            total_plans += 1

        total_failures = len(failed_plan_reports)

        if failed_plan_reports:
            all_plans_report.failed_audits = failed_plan_reports
        if all_warned_audits:
            all_plans_report.warned_audits = all_warned_audits

        all_plans_report.total_audits = total_plans
        all_plans_report.total_failures = total_failures
        all_plans_report.total_success = total_plans - total_failures
        all_plans_report.total_warnings = total_warnings

        logger.info("PlanAuditor: Generated report:\n%s " % all_plans_report)

        return all_plans_report
    def monitor_master(self):
        services_down = []
        if not self._scheduler.is_alive():
            services_down.append("Scheduler")
        if self._backup_expiration_manager and not self._backup_expiration_manager.is_alive():
            services_down.append("Expiration Manager")

        if self._backup_sweeper and not self._backup_sweeper.is_alive():
            services_down.append("Backup Sweeper")

        if self._plan_generators:
            for g in self._plan_generators:
                if not g.is_alive():
                    services_down.append("Plan Generator: '%s'" % g.name)

        if services_down:
            msg = "Mbs Master has some services down: %s" % "\n".join(services_down)
            logger.error(msg)
            get_mbs().notifications.send_event_notification("Master Services DOWN!!!!",
                                                            msg, priority=NotificationPriority.CRITICAL)
Пример #45
0
    def _process_failed_backups(self):
        """
        Reschedule failed backups that failed and are retriable
        """

        q = {
            "state": State.FAILED,
            "nextRetryDate": {
                "$lt": date_now()
            }
        }

        for backup in get_mbs().backup_collection.find_iter(q):
            try:
                self._process_failed_backup(backup)
            except Exception, ex:
                subject = "Plan Scheduler Error"
                message = ("Error while rescheduling backup '%s'. Cause: %s.\n\nStack Trace:\n%s" %
                           (backup.id, ex, traceback.format_exc()))
                logger.exception(message)
                get_mbs().notifications.send_error_notification(subject, message)
Пример #46
0
    def _process_plan(self, plan, plan_backups):
        total_dont_expire = 0
        total_expired = 0
        logger.info("==== Processing plan '%s' .... " % plan.id)
        # Ensure we have the latest revision of the backup plan
        plan = persistence.get_backup_plan(plan.id) or plan
        try:
            expirable_backups, non_expirable_backups = self.find_plan_expirable_backups(plan, plan_backups)
            if non_expirable_backups:
                mark_plan_backups_not_expirable(plan, non_expirable_backups)
                total_dont_expire += len(non_expirable_backups)

            total_expired += self.expire_plan_dues(plan, expirable_backups)
        except Exception, e:
            logger.exception("BackupExpirationManager Error while"
                             " processing plan '%s'" % plan.id)
            subject = "BackupExpirationManager Error"
            message = ("BackupExpirationManager Error while processing"
                       " plan '%s'\n\nStack Trace:\n%s" %
                       (plan.id, traceback.format_exc()))
            get_mbs().notifications.send_error_notification(subject, message)
Пример #47
0
    def __init__(self, id=None, max_workers=10,
                       temp_dir=None,
                       command_port=8888):
        Thread.__init__(self)
        self._id = id
        self._engine_guid = None
        self._max_workers = int(max_workers)
        self._temp_dir = resolve_path(temp_dir or DEFAULT_BACKUP_TEMP_DIR_ROOT)
        self._command_port = command_port
        self._command_server = EngineCommandServer(self)
        self._tags = None
        self._stopped = False

        # create the backup processor
        bc = get_mbs().backup_collection
        self._backup_processor = TaskQueueProcessor("Backups", bc, self,
                                                    self._max_workers)

        # create the restore processor
        rc = get_mbs().restore_collection
        self._restore_processor = TaskQueueProcessor("Restores", rc, self,
                                                     self._max_workers)