Пример #1
0
    def refresh(self):
        self.plugins = []
        cmd = [smf.SVCSCMD, "-H", "-o", "state,FMRI", PLUGINBASEFMRI]

        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)
        outdata,errdata = p.communicate()
        err = p.wait()
        if err != 0:
            self._refreshLock.release()
            raise RuntimeError, '%s failed with exit code %d\n%s' % \
                                (str(cmd), err, errdata)
        for line in outdata.rstrip().split('\n'):
            line = line.rstrip().split()
            state = line[0]
            fmri = line[1]

            # Note that the plugins, being dependent on the time-slider service
            # themselves will typically be in an offline state when enabled. They will
            # transition to an "online" state once time-slider itself comes
            # "online" to satisfy it's dependency
            if state == "online" or state == "offline" or state == "degraded":
                util.debug("Found enabled plugin:\t%s" % (fmri), self.verbose)
                try:
                    plugin = Plugin(fmri, self.verbose)
                    self.plugins.append(plugin)
                except RuntimeError, message:
                    sys.stderr.write("Ignoring misconfigured plugin: %s\n" \
                                     % (fmri))
                    sys.stderr.write("Reason:\n%s\n" % (message))
            else:
                util.debug("Found disabled plugin:\t%s" + fmri, self.verbose)
Пример #2
0
 def _run_warning_cleanup(self, zpool):
     util.debug("Performing warning level cleanup on %s" % \
                zpool.name, \
                self.verbose)
     self._run_cleanup(zpool, "daily", self._warningLevel)
     if zpool.get_capacity() > self._warningLevel:
         self._run_cleanup(zpool, "hourly", self._warningLevel)
Пример #3
0
    def refresh(self):
        self.plugins = []
        cmd = [smf.SVCSCMD, "-H", "-o", "state,FMRI", PLUGINBASEFMRI]

        p = subprocess.Popen(cmd,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             close_fds=True)
        outdata,errdata = p.communicate()
        err = p.wait()
        if err != 0:
            self._refreshLock.release()
            raise RuntimeError, '%s failed with exit code %d\n%s' % \
                                (str(cmd), err, errdata)
        for line in outdata.rstrip().split('\n'):
            line = line.rstrip().split()
            state = line[0]
            fmri = line[1]

            # Note that the plugins, being dependent on the time-slider service
            # themselves will typically be in an offline state when enabled. They will
            # transition to an "online" state once time-slider itself comes
            # "online" to satisfy it's dependency
            if state == "online" or state == "offline" or state == "degraded":
                util.debug("Found enabled plugin:\t%s" % (fmri), self.verbose)
                try:
                    plugin = Plugin(fmri, self.verbose)
                    self.plugins.append(plugin)
                except RuntimeError, message:
                    sys.stderr.write("Ignoring misconfigured plugin: %s\n" \
                                     % (fmri))
                    sys.stderr.write("Reason:\n%s\n" % (message))
            else:
                util.debug("Found disabled plugin:\t%s" % (fmri), self.verbose)
Пример #4
0
 def get(self, section, option):
     try:
         result = self.config.get(section, option)
         util.debug('CONFIG: GET section %s, option %s with value %s\n' % (section, option, result), 1)
         return result
     except (ConfigParser.NoOptionError, ConfigParser.NoSectionError):
         util.debug('CONFIG: NOTFOUND section %s, option %s\n' % (section, option), 1)
         return ''
Пример #5
0
 def _run_critical_cleanup(self, zpool):
     util.debug("Performing critical level cleanup on %s" % \
                zpool.name, \
                self.verbose)
     self._run_cleanup(zpool, "weekly", self._criticalLevel)
     if zpool.get_capacity() > self._criticalLevel:
         self._run_cleanup(zpool, "daily", self._criticalLevel)
     if zpool.get_capacity() > self._criticalLevel:
         self._run_cleanup(zpool, "hourly", self._criticalLevel)
Пример #6
0
 def get(self, section, option):
     try:
         result = self.config.get(section, option)
         util.debug(
             'CONFIG: GET section %s, option %s with value %s\n' %
             (section, option, result), 1)
         return result
     except (configparser.NoOptionError, configparser.NoSectionError):
         util.debug(
             'CONFIG: NOTFOUND section %s, option %s\n' % (section, option),
             1)
         return ''
Пример #7
0
 def _run_emergency_cleanup(self, zpool):
     util.debug("Performing emergency level cleanup on %s" % \
                zpool.name, \
                self.verbose)
     self._run_cleanup(zpool, "monthly", self._emergencyLevel)
     if zpool.get_capacity() > self._emergencyLevel:
         self._run_cleanup(zpool, "weekly", self._emergencyLevel)
     if zpool.get_capacity() > self._emergencyLevel:
         self._run_cleanup(zpool, "daily", self._emergencyLevel)
     if zpool.get_capacity() > self._emergencyLevel:
         self._run_cleanup(zpool, "hourly", self._emergencyLevel)
     if zpool.get_capacity() > self._emergencyLevel:
         self._run_cleanup(zpool, "frequent", self._emergencyLevel)
     #Finally, as a last resort, delete custom scheduled snaphots
     for schedule, i, p, k in self._customSchedules:
         if zpool.get_capacity() < self._emergencyLevel:
             break
         else:
             self._run_cleanup(zpool, schedule, self._emergencyLevel)
Пример #8
0
    def _needs_cleanup(self):
        if self._remedialCleanup == False:
            # Sys admin has explicitly instructed for remedial cleanups
            # not to be performed.
            return False
        now = int(time.time())
        # Don't run checks any less than 15 minutes apart.
        if self._cleanupLock.acquire(False) == False:
            #Indicates that a cleanup is already running.
            return False
        # FIXME - Make the cleanup interval equal to the minimum snapshot interval
        # if custom snapshot schedules are defined and enabled.
        elif ((now - self._lastCleanupCheck) < (_MINUTE * 15)):
            pass
        else:
            for zpool in self._zpools:
                try:
                    if zpool.get_capacity() > self._warningLevel:
                        # Before getting into a panic, determine if the pool
                        # is one we actually take snapshots on, by checking
                        # for one of the "auto-snapshot:<schedule> tags. Not
                        # super fast, but it only happens under exceptional
                        # circumstances of a zpool nearing it's capacity.

                        for sched in self._allSchedules:
                            sets = zpool.list_auto_snapshot_sets(sched[0])
                            if len(sets) > 0:
                                util.debug("%s needs a cleanup" \
                                           % zpool.name, \
                                           self.verbose)
                                self._cleanupLock.release()
                                return True
                except RuntimeError as message:
                    sys.stderr.write("Error checking zpool capacity of: " + \
                                     zpool.name + "\n")
                    self._cleanupLock.release()
                    self.exitCode = smf.SMF_EXIT_ERR_FATAL
                    # Propogate up to thread's run() mehod.
                    raise RuntimeError(message)
            self._lastCleanupCheck = int(time.time())
        self._cleanupLock.release()
        return False
Пример #9
0
 def _check_snapshots(self):
     """
     Check the schedules and see what the required snapshot is.
     Take one immediately on the first overdue snapshot required
     """
     # Make sure a refresh() doesn't mess with the schedule while
     # we're reading through it.
     self._refreshLock.acquire()
     next, schedule = self._next_due()
     self._refreshLock.release()
     now = int(time.time())
     while next != None and next <= now:
         label = self._take_snapshots(schedule)
         self._plugin.execute_plugins(schedule, label)
         self._refreshLock.acquire()
         self._update_schedules()
         next, schedule = self._next_due()
         self._refreshLock.release()
         dt = datetime.datetime.fromtimestamp(next)
         util.debug("Next snapshot is %s due at: %s" % \
                    (schedule, dt.isoformat()), \
                    self.verbose)
     return next
Пример #10
0
    def __init__(self, instanceName, debug=False):
        self.verbose = debug
        util.debug("Instantiating plugin for:\t%s" % (instanceName), self.verbose)
        self.smfInst = pluginsmf.PluginSMF(instanceName)
        self._proc = None

        # Note that the associated plugin service's start method checks
        # that the command is defined and executable. But SMF doesn't 
        # bother to do this for offline services until all dependencies
        # (ie. time-slider) are brought online.
        # So we also check the permissions here.
        command = self.smfInst.get_trigger_command()
        try:
            statinfo = os.stat(command)
            other_x = (statinfo.st_mode & 01)
            if other_x == 0:
              raise RuntimeError, 'Plugin: %s:\nConfigured trigger command is not ' \
                                  'executable:\n%s' \
                                  % (self.smfInst.instanceName, command)  
        except OSError:
            raise RuntimeError, 'Plugin: %s:\nCan not access the configured ' \
                                'plugin/trigger_command:\n%s' \
                                % (self.smfInst.instanceName, command)      
Пример #11
0
    def __init__(self, instanceName, debug=False):
        self.verbose = debug
        util.debug("Instantiating plugin for:\t%s" % (instanceName), self.verbose)
        self.smfInst = pluginsmf.PluginSMF(instanceName)
        self._proc = None

        # Note that the associated plugin service's start method checks
        # that the command is defined and executable. But SMF doesn't 
        # bother to do this for offline services until all dependencies
        # (ie. time-slider) are brought online.
        # So we also check the permissions here.
        command = self.smfInst.get_trigger_command()
        try:
            statinfo = os.stat(command)
            other_x = (statinfo.st_mode & 01)
            if other_x == 0:
              raise RuntimeError, 'Plugin: %s:\nConfigured trigger command is not ' \
                                  'executable:\n%s' \
                                  % (self.smfInst.instanceName, command)  
        except OSError:
            raise RuntimeError, 'Plugin: %s:\nCan not access the configured ' \
                                'plugin/trigger_command:\n%s' \
                                % (self.smfInst.instanceName, command)      
Пример #12
0
    def _perform_cleanup(self):
        if self._cleanupLock.acquire(False) == False:
            # Cleanup already running. Skip
            return
        self._destroyedsnaps = []
        for zpool in self._zpools:
            try:
                self._poolstatus[zpool.name] = 0
                capacity = zpool.get_capacity()
                if capacity > self._warningLevel:
                    self._run_warning_cleanup(zpool)
                    self._poolstatus[zpool.name] = 1
                    capacity = zpool.get_capacity()
                if capacity > self._criticalLevel:
                    self._run_critical_cleanup(zpool)
                    self._poolstatus[zpool.name] = 2
                    capacity = zpool.get_capacity()
                if capacity > self._emergencyLevel:
                    self._run_emergency_cleanup(zpool)
                    self._poolstatus[zpool.name] = 3
                    capacity = zpool.get_capacity()
                if capacity > self._emergencyLevel:
                    self._run_emergency_cleanup(zpool)
                    self._poolstatus[zpool.name] = 4
            # This also catches exceptions thrown from _run_<level>_cleanup()
            # and _run_cleanup() in methods called by _perform_cleanup()
            except RuntimeError as message:
                sys.stderr.write("Remedial space cleanup failed because " + \
                                 "of failure to determinecapacity of: " + \
                                 zpool.name + "\n")
                self.exitCode = smf.SMF_EXIT_ERR_FATAL
                self._cleanupLock.release()
                # Propogate up to thread's run() method.
                raise RuntimeError(message)

            # Bad - there's no more snapshots left and nothing
            # left to delete. We don't disable the service since
            # it will permit self recovery and snapshot
            # retention when space becomes available on
            # the pool (hopefully).
            util.debug("%s pool status after cleanup:" \
                       % zpool.name, \
                       self.verbose)
            util.debug(zpool.name, self.verbose)
        util.debug("Cleanup completed. %d snapshots were destroyed" \
                   % len(self._destroyedsnaps), \
                   self.verbose)
        # Avoid needless list iteration for non-debug mode
        if self.verbose == True and len(self._destroyedsnaps) > 0:
            for snap in self._destroyedsnaps:
                sys.stderr.write("\t%s\n" % snap)
        self._cleanupLock.release()
Пример #13
0
 def is_running(self):
     if self._proc == None:
         util.debug("Plugin child process is not started", self.verbose)
         return False
     else:
         self._proc.poll()
         if self._proc.returncode == None:
             util.debug("Plugin child process is still running",
                        self.verbose)
             return True
         else:
             util.debug("Plugin child process has ended", self.verbose)
             return False
Пример #14
0
 def is_running(self):
     if self._proc == None:
         util.debug("Plugin child process is not started", self.verbose)
         return False
     else:
         self._proc.poll()
         if self._proc.returncode == None:
             util.debug("Plugin child process is still running",
                        self.verbose)
             return True
         else:
             util.debug("Plugin child process has ended", self.verbose)
             return False
Пример #15
0
    def execute(self, schedule, label):

        triggers = self.smfInst.get_trigger_list()
        try:
            triggers.index("all")
        except ValueError:
            try:
                triggers.index(schedule)
            except ValueError:
                return

        # Skip if already running
        if self.is_running() == True:
            util.debug("Plugin: %s is already running. Skipping execution" \
                       % (self.smfInst.instanceName), \
                       self.verbose)
            return
        # Skip if plugin FMRI has been disabled or placed into maintenance
        cmd = [smf.SVCSCMD, "-H", "-o", "state", self.smfInst.instanceName]
        outdata, errdata = util.run_command(cmd)
        state = outdata.strip()
        if state == "disabled" or state == "maintenance":
            util.debug("Plugin: %s is in %s state. Skipping execution" \
                       % (self.smfInst.instanceName, state), \
                       self.verbose)
            return

        cmd = self.smfInst.get_trigger_command()
        util.debug("Executing plugin command: %s" % str(cmd), self.verbose)
        svcFmri = "%s:%s" % (autosnapsmf.BASESVC, schedule)

        os.putenv("AUTOSNAP_FMRI", svcFmri)
        os.putenv("AUTOSNAP_LABEL", label)
        try:
            os.putenv("PLUGIN_FMRI", self.smfInst.instanceName)
            self._proc = subprocess.Popen(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          close_fds=True,
                                          universal_newlines=True)
        except OSError as message:
            raise RuntimeError("%s subprocess error:\n %s" % \
                                (cmd, str(message)))
            self._proc = None
Пример #16
0
    def execute(self, schedule, label):

        triggers = self.smfInst.get_trigger_list()
        try:
            triggers.index("all")
        except ValueError:
            try:
                triggers.index(schedule)
            except ValueError:
                return

        # Skip if already running
        if self.is_running() == True:
            util.debug("Plugin: %s is already running. Skipping execution" \
                       % (self.smfInst.instanceName), \
                       self.verbose)
            return
        # Skip if plugin FMRI has been disabled or placed into maintenance
        cmd = [smf.SVCSCMD, "-H", "-o", "state", self.smfInst.instanceName]
        outdata,errdata = util.run_command(cmd)
        state = outdata.strip()
        if state == "disabled" or state == "maintenance":
            util.debug("Plugin: %s is in %s state. Skipping execution" \
                       % (self.smfInst.instanceName, state), \
                       self.verbose)
            return

        cmd = self.smfInst.get_trigger_command()
        util.debug("Executing plugin command: %s" % str(cmd), self.verbose)
        svcFmri = "%s:%s" % (autosnapsmf.BASESVC, schedule)

        os.putenv("AUTOSNAP_FMRI", svcFmri)
        os.putenv("AUTOSNAP_LABEL", label)
        try:
            os.putenv("PLUGIN_FMRI", self.smfInst.instanceName) 
            self._proc = subprocess.Popen(cmd,
                                          stdout=subprocess.PIPE,
                                          stderr=subprocess.PIPE,
                                          close_fds=True)
        except OSError, message:
            raise RuntimeError, "%s subprocess error:\n %s" % \
                                (cmd, str(message))
            self._proc = None
Пример #17
0
def main(argv):
    # Check that appropriate environment variables have been
    # provided by time-sliderd
    #
    # The label used for the snapshot set just taken, ie. the
    # component proceeding the "@" in the snapshot name
    snaplabel = os.getenv("AUTOSNAP_LABEL")
    # The SMF fmri of the auto-snapshot instance corresponding to
    # the snapshot set just taken.
    snapfmri = os.getenv("AUTOSNAP_FMRI")
    # The SMF fmri of the time-slider plugin instance associated with
    # this command.
    pluginfmri = os.getenv("PLUGIN_FMRI")

    if pluginfmri == None:
        sys.stderr.write("No time-slider plugin SMF instance FMRI defined. " \
                         "This plugin does not support command line "
                         "execution. Exiting\n")
        sys.exit(-1)
    syslog.openlog(pluginfmri, 0, syslog.LOG_DAEMON)

    cmd = [smf.SVCPROPCMD, "-p", verboseprop, pluginfmri]
    outdata, errdata = util.run_command(cmd)
    if outdata.rstrip() == "true":
        verbose = True
    else:
        verbose = False

    if snaplabel == None:
        log_error(syslog.LOG_ERR, "No snapshot label provided. Exiting")
        sys.exit(-1)
    if snapfmri == None:
        log_error(syslog.LOG_ERR,
                  "No auto-snapshot SMF instance FMRI provided. Exiting")
        sys.exit(-1)

    schedule = snapfmri.rsplit(':', 1)[1]
    plugininstance = pluginfmri.rsplit(':', 1)[1]

    # The user property/tag used when tagging and holding zfs datasets
    propname = "%s:%s" % (propbasename, plugininstance)

    # Identifying snapshots is a 3 stage process.
    #
    # First: identify all snapshots matching the AUTOSNAP_LABEL
    # value passed in by the time-slider daemon.
    #
    # Second: Filter out snapshots of volumes, since rsync can only
    # back up filesystems.
    #
    # Third: we need to filter the results and ensure that the
    # filesystem corresponding to each snapshot is actually
    # tagged with the property (com.sun:auto-snapshot<:schedule>)
    #
    # This is necessary to avoid confusion whereby a snapshot might
    # have been sent|received from one zpool to another on the same
    # system. The received snapshot will show up in the first pass
    # results but is not actually part of the auto-snapshot set
    # created by time-slider. It also avoids incorrectly placing
    # zfs holds on the imported snapshots.

    datasets = zfs.Datasets()
    candidates = datasets.list_snapshots(snaplabel)
    autosnapsets = datasets.list_auto_snapshot_sets(schedule)
    autosnapfs = [name for [name,mount] in datasets.list_filesystems() \
                   if name in autosnapsets]
    snappeddatasets = []
    snapnames = [name for [name,ctime] in candidates \
                 if name.split('@',1)[0] in autosnapfs]

    # Mark the snapshots with a user property. Doing this instead of
    # placing a physical hold on the snapshot allows time-slider to
    # expire the snapshots naturally or destroy them if a zpool fills
    # up and triggers a remedial cleanup.
    # It also prevents the possiblity of leaving snapshots lying around
    # indefinitely on the system if the plugin SMF instance becomes
    # disabled or having to release a pile of held snapshots.
    # We set org.opensolaris:time-slider-plugin:<instance> to "pending",
    # indicate
    snapshots = []
    for snap in snapnames:
        snapshot = zfs.Snapshot(snap)
        fs = zfs.Filesystem(snapshot.fsname)
        if fs.get_user_property(rsyncsmf.RSYNCFSTAG) == "true":
            if fs.is_mounted() == True:
                snapshot.set_user_property(propname, "pending")
                util.debug("Marking %s as pending rsync" % (snap), verbose)
            else:
                util.debug("Ignoring snapshot of unmounted fileystem: %s" \
                           % (snap), verbose)
Пример #18
0
    def _configure_svc_props(self):
        try:
            self.verbose = self._smf.get_verbose()
        except RuntimeError as message:
            sys.stderr.write("Error determing whether debugging is enabled\n")
            self.verbose = False

        try:
            cleanup = self._smf.get_remedial_cleanup()
            warn = self._smf.get_cleanup_level("warning")
            util.debug("Warning level value is:   %d%%" % warn, self.verbose)
            crit = self._smf.get_cleanup_level("critical")
            util.debug("Critical level value is:  %d%%" % crit, self.verbose)
            emer = self._smf.get_cleanup_level("emergency")
            util.debug("Emergency level value is: %d%%" % emer, self.verbose)
        except RuntimeError as message:
            sys.stderr.write("Failed to determine cleanup threshhold levels\n")
            sys.stderr.write("Details:\n" + \
                             "--------BEGIN ERROR MESSAGE--------\n" + \
                             str(message) + \
                             "\n---------END ERROR MESSAGE---------\n")
            sys.stderr.write("Using factory defaults of 80%, 90% and 95%\n")
            #Go with defaults
            #FIXME - this would be an appropriate case to mark svc as degraded
            self._remedialCleanup = True
            self._warningLevel = 80
            self._criticalLevel = 90
            self._emergencyLevel = 95
        else:
            self._remedialCleanup = cleanup
            self._warningLevel = warn
            self._criticalLevel = crit
            self._emergencyLevel = emer

        try:
            self._keepEmpties = self._smf.get_keep_empties()
        except RuntimeError as message:
            # Not fatal, just assume we delete them (default configuration)
            sys.stderr.write(
                "Can't determine whether to keep empty snapshots\n")
            sys.stderr.write("Details:\n" + \
                             "--------BEGIN ERROR MESSAGE--------\n" + \
                             str(message) + \
                             "\n---------END ERROR MESSAGE---------\n")
            sys.stderr.write("Assuming default value: False\n")
            self._keepEmpties = False

        # Previously, snapshot labels used the ":" character was used as a
        # separator character for datestamps. Windows filesystems such as
        # CIFS and FAT choke on this character so now we use a user definable
        # separator value, with a default value of "_"
        # We need to check for both the old and new format when looking for
        # snapshots.
        self._separator = self._smf.get_separator()
        self._prefix = "%s[:%s]" \
            % (autosnapsmf.SNAPLABELPREFIX, self._separator)

        # Rebuild pool list
        self._zpools = []
        try:
            for poolname in zfs.list_zpools():
                # Do not try to examine FAULTED pools
                zpool = zfs.ZPool(poolname)
                if zpool.health == "FAULTED":
                    util.debug("Ignoring faulted Zpool: %s\n" \
                               % (zpool.name), \
                               self.verbose)
                else:
                    self._zpools.append(zpool)
                util.debug(str(zpool), self.verbose)
        except RuntimeError as message:
            sys.stderr.write("Could not list Zpools\n")
            self.exitCode = smf.SMF_EXIT_ERR_FATAL
            # Propogate exception up to thread's run() method
            raise RuntimeError(message)
Пример #19
0
 def execute_plugins(self, schedule, label):
     util.debug("Executing plugins for \"%s\" with label: \"%s\"" \
                % (schedule, label), \
                self.verbose)
     for plugin in self.plugins:
         plugin.execute(schedule, label)
Пример #20
0
def main(argv):

    # Check appropriate environment variables habe been supplied
    # by time-slider
    #
    # The label used for the snapshot set just taken, ie. the
    # component proceeding the "@" in the snapshot name
    snaplabel = os.getenv("AUTOSNAP_LABEL")
    # The SMF fmri of the auto-snapshot instance corresponding to
    # the snapshot set just taken.
    snapfmri = os.getenv("AUTOSNAP_FMRI")
    # The SMF fmri of the time-slider plugin instance associated with
    # this command.
    pluginfmri = os.getenv("PLUGIN_FMRI")

    if pluginfmri == None:
        sys.stderr.write("No time-slider plugin SMF instance FMRI defined. " \
                         "This plugin does not support command line "
                         "execution. Exiting\n")
        sys.exit(-1)
    syslog.openlog(pluginfmri, 0, syslog.LOG_DAEMON)

    cmd = [smf.SVCPROPCMD, "-p", verboseprop, pluginfmri]
    outdata,errdata = util.run_command(cmd)
    if outdata.rstrip() == "true":
        verbose = True
    else:
        verbose = False

    if snaplabel == None:
        log_error(syslog.LOG_ERR,
                  "No snapshot label defined. Exiting")
        sys.exit(-1)
    if snapfmri == None:
        log_error(syslog.LOG_ERR,
                  "No auto-snapshot SMF instance FMRI defined. Exiting")
        sys.exit(-1)

    schedule = snapfmri.rsplit(':', 1)[1]
    plugininstance = pluginfmri.rsplit(':', 1)[1]

    # The user property/tag used when tagging and holding zfs datasets
    propname = "%s:%s" % (propbasename, plugininstance)

    # Identifying snapshots is a two stage process.
    #
    # First: identify all snapshots matching the AUTOSNAP_LABEL
    # value passed in by the time-slider daemon.
    #
    # Second: we need to filter the results and ensure that the
    # filesystem/voluem corresponding to each snapshot is actually
    # tagged with the property (com.sun:auto-snapshot<:schedule>)
    #
    # This is necessary to avoid confusion whereby a snapshot might
    # have been sent|received from one zpool to another on the same
    # system. The received snapshot will show up in the first pass
    # results but is not actually part of the auto-snapshot set
    # created by time-slider. It also avoids incorrectly placing
    # zfs holds on the imported snapshots.

    datasets = zfs.Datasets()
    candidates = datasets.list_snapshots(snaplabel)
    originsets = datasets.list_auto_snapshot_sets(schedule)
    snappeddatasets = []
    snapnames = [name for [name,ctime] in candidates \
                 if name.split('@',1)[0] in originsets]


    # Place a hold on the the newly created snapshots so
    # they can be backed up without fear of being destroyed
    # before the backup gets a chance to complete.
    for snap in snapnames:
        snapshot = zfs.Snapshot(snap)
        holds = snapshot.holds()
        try:
            holds.index(propname)
        except ValueError:
            util.debug("Placing hold on %s" % (snap), verbose)
            snapshot.hold(propname)
        datasetname = snapshot.fsname
        # Insert datasetnames in alphabetically sorted order because
        # zfs receive falls over if it receives a child before the
        # parent if the "-F" option is not used.
        insort(snappeddatasets, datasetname)

    # Find out the receive command property value
    cmd = [smf.SVCPROPCMD, "-c", "-p", "receive/command", pluginfmri]
    outdata,errdata = util.run_command(cmd)
    # Strip out '\' characters inserted by svcprop
    recvcmd = outdata.strip().replace('\\', '').split()

    # Check to see if the receive command is accessible and executable
    try:
        statinfo = os.stat(recvcmd[0])
        other_x = (statinfo.st_mode & 01)
        if other_x == 0:
            log_error(syslog.LOG_ERR,
                      "Plugin: %s: Configured receive/command is not " \
                      "executable: %s" \
                      % (pluginfmri, outdata))
            maintenance(pluginfmri)
            sys.exit(-1)
    except OSError:
        log_error(syslog.LOG_ERR,
                  "Plugin: %s: Can not access the configured " \
                  "receive/command: %s" \
                  % (pluginfmri, outdata)) 
        maintenance(pluginfmri)   
        sys.exit(-1)

    for dataset in snappeddatasets:
        sendcmd = None
        prevsnapname = None
        ds = zfs.ReadableDataset(dataset)
        prevlabel = ds.get_user_property(propname)

        snapname = "%s@%s" % (ds.name, snaplabel)
        if (prevlabel == None or prevlabel == '-' or len(prevlabel) == 0):
            # No previous backup - send a full replication stream
            sendcmd = [zfs.ZFSCMD, "send", snapname]
            util.debug("No previous backup registered for %s" % ds.name, verbose)
        else:
            # A record of a previous backup exists.
            # Check that it exists to enable send of an incremental stream.
            prevsnapname = "%s@%s" % (ds.name, prevlabel)
            util.debug("Previously sent snapshot: %s" % prevsnapname, verbose)
            prevsnap = zfs.Snapshot(prevsnapname)
            if prevsnap.exists():
                sendcmd = [zfs.ZFSCMD, "send", "-i", prevsnapname, snapname]
            else:
                # This should not happen under normal operation since we
                # place a hold on the snapshot until it gets sent. So
                # getting here suggests that something else released the
                # hold on the snapshot, allowing it to get destroyed
                # prematurely.
                log_error(syslog.LOG_ERR,
                          "Previously sent snapshot no longer exists: %s" \
                          % prevsnapname)
                maintenance(pluginfmri)
                sys.exit(-1)
        
        
        # Invoke the send and receive commands via pfexec(1) since
        # we are not using the role's shell to take care of that
        # for us.
        sendcmd.insert(0, smf.PFCMD)
        recvcmd.insert(0, smf.PFCMD)

        try:
            sendP = subprocess.Popen(sendcmd,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)
            recvP = subprocess.Popen(recvcmd,
                                     stdin=sendP.stdout,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)

            recvout,recverr = recvP.communicate()
            recverrno = recvP.wait()
            sendout,senderr = sendP.communicate()
            senderrno = sendP.wait()

            if senderrno != 0:
                raise RuntimeError, "Send command: %s failed with exit code" \
                                    "%d. Error message: \n%s" \
                                    % (str(sendcmd), senderrno, senderr)
            if recverrno != 0:
                raise RuntimeError, "Receive command %s failed with exit " \
                                    "code %d. Error message: \n%s" \
                                    % (str(recvcmd), recverrno, recverr)

            if prevsnapname != None:
                util.debug("Releasing hold on %s" % (prevsnapname), verbose)
                snapshot = zfs.Snapshot(prevsnapname)
                util.debug("Releasing hold on previous snapshot: %s" \
                      % (prevsnapname),
                      verbose)
                snapshot.release(propname)
        except Exception, message:
            log_error(syslog.LOG_ERR,
                      "Error during snapshot send/receive operation: %s" \
                      % (message))

            maintenance(pluginfmri)
            sys.exit(-1)            

        # Finally, after success, make a record of the latest backup
        # and release the old snapshot.
        ds.set_user_property(propname, snaplabel)
        util.debug("Sending of \"%s\"snapshot streams completed" \
              % (snaplabel),
              verbose)
Пример #21
0
    def _update_schedules(self):
        interval = 0
        idx = 1  # Used to index subsets for schedule overlap calculation
        last = None

        for schedule, interval, period, keep in self._allSchedules:
            # Shortcut if we've already processed this schedule and it's
            # still up to date. Don't skip the default schedules though
            # because overlap affects their scheduling
            if [schedule,interval,period,keep] not in \
                self._defaultSchedules and \
                (self._next[schedule] > self._last[schedule]):
                util.debug("Short circuiting %s recalculation" \
                           % (schedule), \
                           self.verbose)
                continue

            # If we don't have an internal timestamp for the given schedule
            # ask zfs for the last snapshot and get it's creation timestamp.
            if self._last[schedule] == 0:
                try:
                    snaps = self._datasets.list_snapshots("%s%s" % \
                                                         (self._prefix,
                                                          schedule))
                except RuntimeError as message:
                    self.exitCode = smf.SMF_EXIT_ERR_FATAL
                    sys.stderr.write(
                        "Failed to list snapshots during schedule update\n")
                    #Propogate up to the thread's run() method
                    raise RuntimeError(message)

                if len(snaps) > 0:
                    util.debug("Last %s snapshot was: %s" % \
                               (schedule, snaps[-1][0]), \
                               self.verbose)
                    self._last[schedule] = snaps[-1][1]

            last = self._last[schedule]
            if interval != "months":  # months is non-constant. See below.
                util.debug("Recalculating %s schedule" % (schedule), \
                           self.verbose)
                try:
                    totalinterval = intervals[interval] * period
                except KeyError:
                    self.exitCode = smf.SMF_EXIT_ERR_CONFIG
                    sys.stderr.write(schedule + \
                                      " schedule has invalid interval: " + \
                                      "'%s\'\n" % interval)
                    #Propogate up to thread's run() method
                    raise RuntimeError
                if [schedule, interval, period,
                        keep] in self._defaultSchedules:
                    # This is one of the default schedules so check for an
                    # overlap with one of the dominant shchedules.
                    for s, i, p, k in self._defaultSchedules[:idx]:
                        last = max(last, self._last[s])
                    idx += 1

            else:  # interval == "months"
                if self._next[schedule] > last:
                    util.debug("Short circuiting " + \
                               schedule + \
                               " recalculation", \
                               self.verbose)
                    continue
                util.debug("Recalculating %s schedule" % (schedule), \
                           self.verbose)
                snap_tm = time.gmtime(self._last[schedule])
                # Increment year if period >= than 1 calender year.
                year = snap_tm.tm_year
                year += int(period / 12)
                period = period % 12

                mon = (snap_tm.tm_mon + period) % 12
                # Result of 0 actually means december.
                if mon == 0:
                    mon = 12
                # Account for period that spans calendar year boundary.
                elif snap_tm.tm_mon + period > 12:
                    year += 1

                d, dlastmon = calendar.monthrange(snap_tm.tm_year,
                                                  snap_tm.tm_mon)
                d, dnewmon = calendar.monthrange(year, mon)
                mday = snap_tm.tm_mday
                if dlastmon > dnewmon and snap_tm.tm_mday > dnewmon:
                    mday = dnewmon

                tm =(year, mon, mday, \
                    snap_tm.tm_hour, snap_tm.tm_min, snap_tm.tm_sec, \
                    0, 0, -1)
                newt = calendar.timegm(tm)
                new_tm = time.gmtime(newt)
                totalinterval = newt - self._last[schedule]

            self._next[schedule] = last + totalinterval
Пример #22
0
    def _run_cleanup(self, zpool, schedule, threshold):
        clonedsnaps = []
        snapshots = []
        try:
            clonedsnaps = self._datasets.list_cloned_snapshots()
        except RuntimeError as message:
            sys.stderr.write("Error (non-fatal) listing cloned snapshots" +
                             " while recovering pool capacity\n")
            sys.stderr.write("Error details:\n" + \
                             "--------BEGIN ERROR MESSAGE--------\n" + \
                             str(message) + \
                             "\n--------END ERROR MESSAGE--------\n")

        # Build a list of snapshots in the given schedule, that are not
        # cloned, and sort the result in reverse chronological order.
        try:
            snapshots = [s for s,t in \
                            zpool.list_snapshots("%s%s" \
                            % (self._prefix,schedule)) \
                            if not s in clonedsnaps]
            snapshots.reverse()
        except RuntimeError as message:
            sys.stderr.write("Error listing snapshots" +
                             " while recovering pool capacity\n")
            self.exitCode = smf.SMF_EXIT_ERR_FATAL
            # Propogate the error up to the thread's run() method.
            raise RuntimeError(message)

        while zpool.get_capacity() > threshold:
            if len(snapshots) == 0:
                syslog.syslog(syslog.LOG_NOTICE,
                              "No more %s snapshots left" \
                               % schedule)
                return
            """This is not an exact science. Deleteing a zero sized 
            snapshot can have unpredictable results. For example a
            pair of snapshots may share exclusive reference to a large
            amount of data (eg. a large core file). The usage of both
            snapshots will initially be seen to be 0 by zfs(1). Deleting
            one of the snapshots will make the data become unique to the
            single remaining snapshot that references it uniquely. The
            remaining snapshot's size will then show up as non zero. So
            deleting 0 sized snapshot is not as pointless as it might seem.
            It also means we have to loop through this, each snapshot set
            at a time and observe the before and after results. Perhaps
            better way exists...."""

            # Start with the oldest first
            snapname = snapshots.pop()
            snapshot = zfs.Snapshot(snapname)
            # It would be nicer, for performance purposes, to delete sets
            # of snapshots recursively but this might destroy more data than
            # absolutely necessary, plus the previous purging of zero sized
            # snapshots can easily break the recursion chain between
            # filesystems.
            # On the positive side there should be fewer snapshots and they
            # will mostly non-zero so we should get more effectiveness as a
            # result of deleting snapshots since they should be nearly always
            # non zero sized.
            util.debug("Destroying %s" % snapname, self.verbose)
            try:
                snapshot.destroy()
            except RuntimeError as message:
                # Would be nice to be able to mark service as degraded here
                # but it's better to try to continue on rather than to give
                # up alltogether (SMF maintenance state)
                sys.stderr.write("Warning: Cleanup failed to destroy: %s\n" % \
                                 (snapshot.name))
                sys.stderr.write("Details:\n%s\n" % (str(message)))
            else:
                self._destroyedsnaps.append(snapname)
            # Give zfs some time to recalculate.
            time.sleep(3)
Пример #23
0
    def _prune_snapshots(self, dataset, schedule):
        """Cleans out zero sized snapshots, kind of cautiously"""
        # Per schedule: We want to delete 0 sized
        # snapshots but we need to keep at least one around (the most
        # recent one) for each schedule so that that overlap is
        # maintained from frequent -> hourly -> daily etc.
        # Start off with the smallest interval schedule first and
        # move up. This increases the amount of data retained where
        # several snapshots are taken together like a frequent hourly
        # and daily snapshot taken at 12:00am. If 3 snapshots are all
        # identical and reference the same identical data they will all
        # be initially reported as zero for used size. Deleting the
        # daily first then the hourly would shift make the data referenced
        # by all 3 snapshots unique to the frequent scheduled snapshot.
        # This snapshot would probably be purged within an how ever and the
        # data referenced by it would be gone for good.
        # Doing it the other way however ensures that the data should
        # remain accessible to the user for at least a week as long as
        # the pool doesn't run low on available space before that.

        try:
            snaps = dataset.list_snapshots("%s%s" % (self._prefix, schedule))
            # Clone the list because we want to remove items from it
            # while iterating through it.
            remainingsnaps = snaps[:]
        except RuntimeError as message:
            sys.stderr.write(
                "Failed to list snapshots during snapshot cleanup\n")
            self.exitCode = smf.SMF_EXIT_ERR_FATAL
            raise RuntimeError(message)

        if (self._keepEmpties == False):
            try:  # remove the newest one from the list.
                snaps.pop()
            except IndexError:
                pass
            for snapname in snaps:
                try:
                    snapshot = zfs.Snapshot(snapname)
                except Exception as message:
                    sys.stderr.write(str(message))
                    # Not fatal, just skip to the next snapshot
                    continue

                try:
                    if snapshot.get_used_size() == 0:
                        util.debug("Destroying zero sized: " + snapname, \
                                   self.verbose)
                        try:
                            snapshot.destroy()
                        except RuntimeError as message:
                            sys.stderr.write("Failed to destroy snapshot: " +
                                             snapname + "\n")
                            self.exitCode = smf.SMF_EXIT_MON_DEGRADE
                            # Propogate exception so thread can exit
                            raise RuntimeError(message)
                        remainingsnaps.remove(snapname)
                except RuntimeError as message:
                    sys.stderr.write("Can not determine used size of: " + \
                                     snapname + "\n")
                    self.exitCode = smf.SMF_EXIT_MON_DEGRADE
                    #Propogate the exception to the thead run() method
                    raise RuntimeError(message)

        # Deleting individual snapshots instead of recursive sets
        # breaks the recursion chain and leaves child snapshots
        # dangling so we need to take care of cleaning up the
        # snapshots.
        target = len(remainingsnaps) - self._keep[schedule]
        counter = 0
        while counter < target:
            util.debug("Destroy expired snapshot: " + \
                       remainingsnaps[counter],
                       self.verbose)
            try:
                snapshot = zfs.Snapshot(remainingsnaps[counter])
            except Exception as message:
                sys.stderr.write(str(message))
                # Not fatal, just skip to the next snapshot
                counter += 1
                continue
            try:
                snapshot.destroy()
            except RuntimeError as message:
                sys.stderr.write("Failed to destroy snapshot: " +
                                 snapshot.name + "\n")
                self.exitCode = smf.SMF_EXIT_ERR_FATAL
                # Propogate exception so thread can exit
                raise RuntimeError(message)
            else:
                counter += 1
Пример #24
0
 def execute_plugins(self, schedule, label):
     util.debug("Executing plugins for \"%s\" with label: \"%s\"" \
                % (schedule, label), \
                self.verbose)
     for plugin in self.plugins:
         plugin.execute(schedule, label)
Пример #25
0
def main(argv):

    # Check appropriate environment variables habe been supplied
    # by time-slider
    #
    # The label used for the snapshot set just taken, ie. the
    # component proceeding the "@" in the snapshot name
    snaplabel = os.getenv("AUTOSNAP_LABEL")
    # The SMF fmri of the auto-snapshot instance corresponding to
    # the snapshot set just taken.
    snapfmri = os.getenv("AUTOSNAP_FMRI")
    # The SMF fmri of the time-slider plugin instance associated with
    # this command.
    pluginfmri = os.getenv("PLUGIN_FMRI")

    if pluginfmri == None:
        sys.stderr.write("No time-slider plugin SMF instance FMRI defined. " \
                         "This plugin does not support command line "
                         "execution. Exiting\n")
        sys.exit(-1)
    syslog.openlog(pluginfmri, 0, syslog.LOG_DAEMON)

    cmd = [smf.SVCPROPCMD, "-p", verboseprop, pluginfmri]
    outdata,errdata = util.run_command(cmd)
    if outdata.rstrip() == "true":
        verbose = True
    else:
        verbose = False

    if snaplabel == None:
        log_error(syslog.LOG_ERR,
                  "No snapshot label defined. Exiting")
        sys.exit(-1)
    if snapfmri == None:
        log_error(syslog.LOG_ERR,
                  "No auto-snapshot SMF instance FMRI defined. Exiting")
        sys.exit(-1)

    schedule = snapfmri.rsplit(':', 1)[1]
    plugininstance = pluginfmri.rsplit(':', 1)[1]

    # The user property/tag used when tagging and holding zfs datasets
    propname = "%s:%s" % (propbasename, plugininstance)

    # Identifying snapshots is a two stage process.
    #
    # First: identify all snapshots matching the AUTOSNAP_LABEL
    # value passed in by the time-slider daemon.
    #
    # Second: we need to filter the results and ensure that the
    # filesystem/voluem corresponding to each snapshot is actually
    # tagged with the property (com.sun:auto-snapshot<:schedule>)
    #
    # This is necessary to avoid confusion whereby a snapshot might
    # have been sent|received from one zpool to another on the same
    # system. The received snapshot will show up in the first pass
    # results but is not actually part of the auto-snapshot set
    # created by time-slider. It also avoids incorrectly placing
    # zfs holds on the imported snapshots.

    datasets = zfs.Datasets()
    candidates = datasets.list_snapshots(snaplabel)
    originsets = datasets.list_auto_snapshot_sets(schedule)
    snappeddatasets = []
    snapnames = [name for [name,ctime] in candidates \
                 if name.split('@',1)[0] in originsets]


    # Place a hold on the the newly created snapshots so
    # they can be backed up without fear of being destroyed
    # before the backup gets a chance to complete.
    for snap in snapnames:
        snapshot = zfs.Snapshot(snap)
        holds = snapshot.holds()
        try:
            holds.index(propname)
        except ValueError:
            util.debug("Placing hold on %s" % (snap), verbose)
            snapshot.hold(propname)
        datasetname = snapshot.fsname
        # Insert datasetnames in alphabetically sorted order because
        # zfs receive falls over if it receives a child before the
        # parent if the "-F" option is not used.
        insort(snappeddatasets, datasetname)

    # Find out the receive command property value
    cmd = [smf.SVCPROPCMD, "-c", "-p", "receive/command", pluginfmri]
    outdata,errdata = util.run_command(cmd)
    # Strip out '\' characters inserted by svcprop
    recvcmd = outdata.strip().replace('\\', '').split()

    # Check to see if the receive command is accessible and executable
    try:
        statinfo = os.stat(recvcmd[0])
        other_x = (statinfo.st_mode & 01)
        if other_x == 0:
            log_error(syslog.LOG_ERR,
                      "Plugin: %s: Configured receive/command is not " \
                      "executable: %s" \
                      % (pluginfmri, outdata))
            maintenance(pluginfmri)
            sys.exit(-1)
    except OSError:
        log_error(syslog.LOG_ERR,
                  "Plugin: %s: Can not access the configured " \
                  "receive/command: %s" \
                  % (pluginfmri, outdata)) 
        maintenance(pluginfmri)   
        sys.exit(-1)

    for dataset in snappeddatasets:
        sendcmd = None
        prevsnapname = None
        ds = zfs.ReadableDataset(dataset)
        prevlabel = ds.get_user_property(propname)

        snapname = "%s@%s" % (ds.name, snaplabel)
        if (prevlabel == None or prevlabel == '-' or len(prevlabel) == 0):
            # No previous backup - send a full replication stream
            sendcmd = [zfs.ZFSCMD, "send", snapname]
            util.debug("No previous backup registered for %s" % ds.name, verbose)
        else:
            # A record of a previous backup exists.
            # Check that it exists to enable send of an incremental stream.
            prevsnapname = "%s@%s" % (ds.name, prevlabel)
            util.debug("Previously sent snapshot: %s" % prevsnapname, verbose)
            prevsnap = zfs.Snapshot(prevsnapname)
            if prevsnap.exists():
                sendcmd = [zfs.ZFSCMD, "send", "-i", prevsnapname, snapname]
            else:
                # This should not happen under normal operation since we
                # place a hold on the snapshot until it gets sent. So
                # getting here suggests that something else released the
                # hold on the snapshot, allowing it to get destroyed
                # prematurely.
                log_error(syslog.LOG_ERR,
                          "Previously sent snapshot no longer exists: %s" \
                          % prevsnapname)
                maintenance(pluginfmri)
                sys.exit(-1)
        
        
        # Invoke the send and receive commands via pfexec(1) since
        # we are not using the role's shell to take care of that
        # for us.
        sendcmd.insert(0, smf.PFCMD)
        recvcmd.insert(0, smf.PFCMD)

        try:
            sendP = subprocess.Popen(sendcmd,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)
            recvP = subprocess.Popen(recvcmd,
                                     stdin=sendP.stdout,
                                     stderr=subprocess.PIPE,
                                     close_fds=True)

            recvout,recverr = recvP.communicate()
            recverrno = recvP.wait()
            sendout,senderr = sendP.communicate()
            senderrno = sendP.wait()

            if senderrno != 0:
                raise RuntimeError, "Send command: %s failed with exit code" \
                                    "%d. Error message: \n%s" \
                                    % (str(sendcmd), senderrno, senderr)
            if recverrno != 0:
                raise RuntimeError, "Receive command %s failed with exit " \
                                    "code %d. Error message: \n%s" \
                                    % (str(recvcmd), recverrno, recverr)

            if prevsnapname != None:
                util.debug("Releasing hold on %s" % (prevsnapname), verbose)
                snapshot = zfs.Snapshot(prevsnapname)
                util.debug("Releasing hold on previous snapshot: %s" \
                      % (prevsnapname),
                      verbose)
                snapshot.release(propname)
        except Exception, message:
            log_error(syslog.LOG_ERR,
                      "Error during snapshot send/receive operation: %s" \
                      % (message))

            maintenance(pluginfmri)
            sys.exit(-1)            

        # Finally, after success, make a record of the latest backup
        # and release the old snapshot.
        ds.set_user_property(propname, snaplabel)
        util.debug("Sending of \"%s\"snapshot streams completed" \
              % (snaplabel),
              verbose)
Пример #26
0
    def run(self):
        # Deselect swap and dump volumes so they don't get snapshotted.
        for vol in self._datasets.list_volumes():
            name = vol.rsplit("/")
            try:
                if (name[1] == "swap" or name[1] == "dump"):
                    util.debug("Auto excluding %s volume" % vol, self.verbose)
                    volume = zfs.Volume(vol)
                    volume.set_auto_snap(False)
            except IndexError:
                pass

        nexttime = None
        waittime = None
        while True:
            try:
                self.refresh()
                # First check and, if necessary, perform any remedial cleanup.
                # This is best done before creating any new snapshots which may
                # otherwise get immediately gobbled up by the remedial cleanup.
                if self._needs_cleanup() == True:
                    self._perform_cleanup()
                    # Check to see if cleanup actually deleted anything before
                    # notifying the user. Avoids the popup appearing continuously
                    if len(self._destroyedsnaps) > 0:
                        self._send_notification()
                    self._send_to_syslog()

                nexttime = self._check_snapshots()
                # Overdue snapshots are already taken automatically
                # inside _check_snapshots() so nexttime should never be
                # < 0. It can be None however, which is fine since it
                # will cause the scheduler thread to sleep indefinitely
                # or until a SIGHUP is caught.
                if nexttime:
                    util.debug("Waiting until " + str(nexttime), self.verbose)
                waittime = None
                if nexttime != None:
                    waittime = nexttime - int(time.time())
                    if (waittime <= 0):
                        # We took too long and missed a snapshot, so break out
                        # and catch up on it the next time through the loop
                        continue
                # waittime could be None if no auto-snap schedules are online
                self._conditionLock.acquire()
                if waittime:
                    util.debug("Waiting %d seconds" % (waittime), self.verbose)
                    self._conditionLock.wait(waittime)
                else:  #None. Just wait a while to check for cleanups.
                    util.debug("No auto-snapshot schedules online.", \
                               self.verbose)
                    self._conditionLock.wait(_MINUTE * 15)

            except OSError as message:
                sys.stderr.write("Caught OSError exception in snapshot" +
                                 " manager thread\n")
                sys.stderr.write("Error details:\n" + \
                                 "--------BEGIN ERROR MESSAGE--------\n" + \
                                 str(message) + \
                                 "\n--------END ERROR MESSAGE--------\n")
                self.exitCode = smf.SMF_EXIT_ERR_FATAL
                # Exit this thread
                break
            except RuntimeError as message:
                sys.stderr.write("Caught RuntimeError exception in snapshot" +
                                 " manager thread\n")
                sys.stderr.write("Error details:\n" + \
                                 "--------BEGIN ERROR MESSAGE--------\n" + \
                                 str(message) + \
                                 "\n--------END ERROR MESSAGE--------\n")
                # Exit this thread
                break
Пример #27
0
def main(argv):
    # Check that appropriate environment variables have been
    # provided by time-sliderd
    #
    # The label used for the snapshot set just taken, ie. the
    # component proceeding the "@" in the snapshot name
    snaplabel = os.getenv("AUTOSNAP_LABEL")
    # The SMF fmri of the auto-snapshot instance corresponding to
    # the snapshot set just taken.
    snapfmri = os.getenv("AUTOSNAP_FMRI")
    # The SMF fmri of the time-slider plugin instance associated with
    # this command.
    pluginfmri = os.getenv("PLUGIN_FMRI")

    if pluginfmri == None:
        sys.stderr.write(
            "No time-slider plugin SMF instance FMRI defined. "
            "This plugin does not support command line "
            "execution. Exiting\n"
        )
        sys.exit(-1)
    syslog.openlog(pluginfmri, 0, syslog.LOG_DAEMON)

    cmd = [smf.SVCPROPCMD, "-p", verboseprop, pluginfmri]
    outdata, errdata = util.run_command(cmd)
    if outdata.rstrip() == "true":
        verbose = True
    else:
        verbose = False

    if snaplabel == None:
        log_error(syslog.LOG_ERR, "No snapshot label provided. Exiting")
        sys.exit(-1)
    if snapfmri == None:
        log_error(syslog.LOG_ERR, "No auto-snapshot SMF instance FMRI provided. Exiting")
        sys.exit(-1)

    schedule = snapfmri.rsplit(":", 1)[1]
    plugininstance = pluginfmri.rsplit(":", 1)[1]

    # The user property/tag used when tagging and holding zfs datasets
    propname = "%s:%s" % (propbasename, plugininstance)

    # Identifying snapshots is a 3 stage process.
    #
    # First: identify all snapshots matching the AUTOSNAP_LABEL
    # value passed in by the time-slider daemon.
    #
    # Second: Filter out snapshots of volumes, since rsync can only
    # back up filesystems.
    #
    # Third: we need to filter the results and ensure that the
    # filesystem corresponding to each snapshot is actually
    # tagged with the property (com.sun:auto-snapshot<:schedule>)
    #
    # This is necessary to avoid confusion whereby a snapshot might
    # have been sent|received from one zpool to another on the same
    # system. The received snapshot will show up in the first pass
    # results but is not actually part of the auto-snapshot set
    # created by time-slider. It also avoids incorrectly placing
    # zfs holds on the imported snapshots.

    datasets = zfs.Datasets()
    candidates = datasets.list_snapshots(snaplabel)
    autosnapsets = datasets.list_auto_snapshot_sets(schedule)
    autosnapfs = [name for [name, mount] in datasets.list_filesystems() if name in autosnapsets]
    snappeddatasets = []
    snapnames = [name for [name, ctime] in candidates if name.split("@", 1)[0] in autosnapfs]

    # Mark the snapshots with a user property. Doing this instead of
    # placing a physical hold on the snapshot allows time-slider to
    # expire the snapshots naturally or destroy them if a zpool fills
    # up and triggers a remedial cleanup.
    # It also prevents the possiblity of leaving snapshots lying around
    # indefinitely on the system if the plugin SMF instance becomes
    # disabled or having to release a pile of held snapshots.
    # We set org.opensolaris:time-slider-plugin:<instance> to "pending",
    # indicate
    snapshots = []
    for snap in snapnames:
        snapshot = zfs.Snapshot(snap)
        fs = zfs.Filesystem(snapshot.fsname)
        if fs.get_user_property(rsyncsmf.RSYNCFSTAG) == "true":
            if fs.is_mounted() == True:
                snapshot.set_user_property(propname, "pending")
                util.debug("Marking %s as pending rsync" % (snap), verbose)
            else:
                util.debug("Ignoring snapshot of unmounted fileystem: %s" % (snap), verbose)