示例#1
0
    def _periodic_reboot_check(self):
        """Check for periodic reboot and take action if necessary.

        Tries to be clever and avoid reboot if connections are up.

        Uptime estimation is annoying: if time is changed on this reboot,
        the estimate may be grossly wrong.  To ensure that we don't reboot
        on the first boot (when time is synchronized) uncontrollably, this
        function also checks that enough watchdog rounds have been run to
        warrant a reboot.  The underlying assumption is that web UI has been
        running continuously, which is currently OK because we don't restart
        it ever (cron watchdog will just reboot if UI is down).

        Staggering of reboot is added by randomizing the "minute" of the
        reboot in the range [0,45] (not [0,60] for leeway).  The "minute"
        is randomized when watchdog is created, so it stays the same every
        time for one reboot.  Note that the stagger is effectively only
        applied to the first reboot attempt; next attempts (e.g. next day
        at designated time) will not have a stagger.

        If more staggering behavior is desired, see XXX below.
        """

        uptime = self.master.get_uptime()
        reboot_required = False
        now = datetime.datetime.utcnow()

        _log.debug('_periodic_reboot_check: uptime=%s' % uptime)
        
        # Check whether UI configuration requires a reboot (time & day match)
        try:
            reboot_limit = uihelpers.compute_periodic_reboot_time()
            reboot_limit += self._periodic_reboot_stagger_delta
            _log.debug('_periodic_reboot_check: reboot limit after stagger: %s' % reboot_limit)
            
            lm = licensemanager.LicenseMonitor()
            count, limit, limit_leeway = lm.count_normal_users()

            # time to periodic reboot (negative = past due)
            diff = reboot_limit - now
            _log.debug('_periodic_reboot_check: periodic reboot diff (limit-now, time to reboot): %s' % str(diff))

            if diff <= datetime.timedelta(0, 0, 0):
                overdue = -diff
                _log.debug('_periodic_reboot_check: periodic reboot is %s overdue' % overdue)
                if count > 0:
                    # there are clients (without license restrictions!), give 24h leeway
                    if overdue < datetime.timedelta(1, 0, 0):  # XXX: hardcoded
                        _log.info('_periodic_reboot_check: want to do a periodic reboot, but there are active clients (%d), skipping' % count)
                    else:
                        _log.warning('_periodic_reboot_check: want to a periodic reboot, active clients (%d), but leeway over, rebooting anyway' % count)
                        reboot_required = True
                else:
                    _log.warning('_periodic_reboot_check: want to do a periodic reboot, and no active clients, ok')
                    reboot_required = True
        except:
            _log.exception('_periodic_reboot_check: failed when checking for periodic reboot policy')

        # If not within periodic reboot time window (e.g. 02:00-03:00 local time),
        # skip periodic reboot.
        if reboot_required:
            # XXX: better stagger check could be applied here (checked every day)
            if not uihelpers.check_periodic_reboot_time_window(now):
                _log.warning('_periodic_reboot_check: want to do a periodic reboot, but not within periodic reboot time window')
                reboot_required = False
            
        # If more than a maximum number of days, reboot, despite configuration
        if uptime > constants.PERIODIC_REBOOT_MAX_UPTIME:
            _log.warning('_periodic_reboot_check: uptime is too large (%s), requires reboot' % uptime)
            reboot_required = True
        elif uptime < 0.0:
            # negative uptime: ignore it for now; if the diff is great, we'll get a periodic reboot anyway later
            _log.warning('_periodic_reboot_check: uptime is negative (%s), ignoring' % uptime)

        # Sanity check: if we want to reboot, check that enough watchdog rounds
        # have elapsed (roughly 24h).
        if reboot_required:
            rounds = self.get_watchdog_rounds()
            if rounds < constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS:
                _log.warning('_periodic_reboot_check: want to do periodic reboot, but watchdog rounds too low (%d < %d)' % (rounds, constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS))
                reboot_required = False

        # Take action if necessary
        if reboot_required:
            if self._periodic_reboot_started:
                _log.info('_periodic_reboot_check: reboot required but periodic reboot already in progress, no action needed')
            else:
                try:
                    _log.warning('_periodic_reboot_check: periodic reboot started')
                    self._periodic_reboot_started = True
                    self._periodic_reboot_show_warning()
                    helpers.increment_global_status_counter(ns.periodicReboots)
                    helpers.db_flush()
                except:
                    _log.exception('failed to increment counter')

                try:
                    helpers.write_datetime_marker_file(constants.LAST_AUTOMATIC_REBOOT_MARKER_FILE)
                except:
                    _log.exception('failed to write last automatic reboot marker file')

                uihelpers.ui_reboot(constants.WEBUI_PRODUCT_PERIODIC_REBOOT_MESSAGE, skip_update=False, force_update=False, force_fsck=True, delay=120.0)  # XXX: constants
示例#2
0
    def _periodic_reboot_check(self):
        """Check for periodic reboot and take action if necessary.

        Tries to be clever and avoid reboot if connections are up.

        Uptime estimation is annoying: if time is changed on this reboot,
        the estimate may be grossly wrong.  To ensure that we don't reboot
        on the first boot (when time is synchronized) uncontrollably, this
        function also checks that enough watchdog rounds have been run to
        warrant a reboot.  The underlying assumption is that web UI has been
        running continuously, which is currently OK because we don't restart
        it ever (cron watchdog will just reboot if UI is down).

        Staggering of reboot is added by randomizing the "minute" of the
        reboot in the range [0,45] (not [0,60] for leeway).  The "minute"
        is randomized when watchdog is created, so it stays the same every
        time for one reboot.  Note that the stagger is effectively only
        applied to the first reboot attempt; next attempts (e.g. next day
        at designated time) will not have a stagger.

        If more staggering behavior is desired, see XXX below.
        """

        uptime = self.master.get_uptime()
        reboot_required = False
        now = datetime.datetime.utcnow()

        _log.debug('_periodic_reboot_check: uptime=%s' % uptime)

        # Check whether UI configuration requires a reboot (time & day match)
        try:
            reboot_limit = uihelpers.compute_periodic_reboot_time()
            reboot_limit += self._periodic_reboot_stagger_delta
            _log.debug(
                '_periodic_reboot_check: reboot limit after stagger: %s' %
                reboot_limit)

            lm = licensemanager.LicenseMonitor()
            count, limit, limit_leeway = lm.count_normal_users()

            # time to periodic reboot (negative = past due)
            diff = reboot_limit - now
            _log.debug(
                '_periodic_reboot_check: periodic reboot diff (limit-now, time to reboot): %s'
                % str(diff))

            if diff <= datetime.timedelta(0, 0, 0):
                overdue = -diff
                _log.debug(
                    '_periodic_reboot_check: periodic reboot is %s overdue' %
                    overdue)
                if count > 0:
                    # there are clients (without license restrictions!), give 24h leeway
                    if overdue < datetime.timedelta(1, 0, 0):  # XXX: hardcoded
                        _log.info(
                            '_periodic_reboot_check: want to do a periodic reboot, but there are active clients (%d), skipping'
                            % count)
                    else:
                        _log.warning(
                            '_periodic_reboot_check: want to a periodic reboot, active clients (%d), but leeway over, rebooting anyway'
                            % count)
                        reboot_required = True
                else:
                    _log.warning(
                        '_periodic_reboot_check: want to do a periodic reboot, and no active clients, ok'
                    )
                    reboot_required = True
        except:
            _log.exception(
                '_periodic_reboot_check: failed when checking for periodic reboot policy'
            )

        # If not within periodic reboot time window (e.g. 02:00-03:00 local time),
        # skip periodic reboot.
        if reboot_required:
            # XXX: better stagger check could be applied here (checked every day)
            if not uihelpers.check_periodic_reboot_time_window(now):
                _log.warning(
                    '_periodic_reboot_check: want to do a periodic reboot, but not within periodic reboot time window'
                )
                reboot_required = False

        # If more than a maximum number of days, reboot, despite configuration
        if uptime > constants.PERIODIC_REBOOT_MAX_UPTIME:
            _log.warning(
                '_periodic_reboot_check: uptime is too large (%s), requires reboot'
                % uptime)
            reboot_required = True
        elif uptime < 0.0:
            # negative uptime: ignore it for now; if the diff is great, we'll get a periodic reboot anyway later
            _log.warning(
                '_periodic_reboot_check: uptime is negative (%s), ignoring' %
                uptime)

        # Sanity check: if we want to reboot, check that enough watchdog rounds
        # have elapsed (roughly 24h).
        if reboot_required:
            rounds = self.get_watchdog_rounds()
            if rounds < constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS:
                _log.warning(
                    '_periodic_reboot_check: want to do periodic reboot, but watchdog rounds too low (%d < %d)'
                    % (rounds,
                       constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS))
                reboot_required = False

        # Take action if necessary
        if reboot_required:
            if self._periodic_reboot_started:
                _log.info(
                    '_periodic_reboot_check: reboot required but periodic reboot already in progress, no action needed'
                )
            else:
                try:
                    _log.warning(
                        '_periodic_reboot_check: periodic reboot started')
                    self._periodic_reboot_started = True
                    self._periodic_reboot_show_warning()
                    helpers.increment_global_status_counter(ns.periodicReboots)
                    helpers.db_flush()
                except:
                    _log.exception('failed to increment counter')

                try:
                    helpers.write_datetime_marker_file(
                        constants.LAST_AUTOMATIC_REBOOT_MARKER_FILE)
                except:
                    _log.exception(
                        'failed to write last automatic reboot marker file')

                uihelpers.ui_reboot(
                    constants.WEBUI_PRODUCT_PERIODIC_REBOOT_MESSAGE,
                    skip_update=False,
                    force_update=False,
                    force_fsck=True,
                    delay=120.0)  # XXX: constants
示例#3
0
def _update_snmp():
    """Update SNMP data."""

    from codebay.l2tpserver import licensemanager
    from codebay.l2tpserver import helpers
    from codebay.l2tpserver.webui import uihelpers

    now = datetime.datetime.utcnow()
    st = helpers.get_status()
    global_st = helpers.get_global_status()
    license_info = helpers.get_license_info()

    def _timeticks(td):
        return int(helpers.timedelta_to_seconds(td) * 100.0)

    def _timestamp(dt):
        return datatypes.encode_datetime_to_iso8601_subset(dt)

    def _get_management_conn():
        # XXX: not the best place for this
        if global_st.hasS(ns.managementServerConnection):
            if global_st.getS(ns.managementServerConnection, rdf.Boolean):
                return 1
        return 0
        
    vals = {}

    lm = licensemanager.LicenseMonitor()
    usr_count, usr_limit, usr_limit_leeway, s2s_count, s2s_limit, s2s_limit_leeway = None, None, None, None, None, None
    try:
        usr_count, usr_limit, usr_limit_leeway, s2s_count, s2s_limit, s2s_limit_leeway = lm.count_both_users()
    except:
        _log.exception('cannot get ppp counts for snmp')

    # XXX: this sharing of status code is quite unclean; see uihelpers.get_status_and_substatus() for suggestions
    health_errors = 0
    try:
        status_class, status_text, substatus_class, substatus_text, status_ok = uihelpers.get_status_and_substatus()
        if status_ok:
            health_errors = 0
        else:
            health_errors = 1
    except:
        _log.exception('cannot determine health errors')
    
    for k, l in [ ('vpneaseHealthCheckErrors',       lambda: health_errors),
                  ('vpneaseUserCount',               lambda: usr_count),
                  ('vpneaseSiteToSiteCount',         lambda: s2s_count),
                  ('vpneaseLastMaintenanceReboot',   lambda: _timestamp(helpers.read_datetime_marker_file(constants.LAST_AUTOMATIC_REBOOT_MARKER_FILE))),
                  ('vpneaseNextMaintenanceReboot',   lambda: _timestamp(uihelpers.compute_periodic_reboot_time())),
                  ('vpneaseLastSoftwareUpdate',      lambda: _timestamp(helpers.read_datetime_marker_file(constants.LAST_SUCCESSFUL_UPDATE_MARKER_FILE))),
                  ('vpneaseSoftwareVersion',         lambda: helpers.get_product_version(cache=True, filecache=True)),
                  ('vpneaseCpuUsage',                lambda: int(global_st.getS(ns.cpuUsage, rdf.Float))),
                  ('vpneaseMemoryUsage',             lambda: int(global_st.getS(ns.memoryUsage, rdf.Float))),
                  ('vpneaseVirtualMemoryUsage',      lambda: int(global_st.getS(ns.swapUsage, rdf.Float))),
                  ('vpneaseServiceUptime',           lambda: _timeticks(now - st.getS(ns.startTime, rdf.Datetime))),
                  ('vpneaseHostUptime',              lambda: _timeticks(datetime.timedelta(0, helpers.get_uptime(), 0))),
                  ('vpneasePublicAddress',           lambda: st.getS(ns.publicInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getAddress().toString()),
                  ('vpneasePublicSubnet',            lambda: st.getS(ns.publicInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getMask().toString()),
                  ('vpneasePublicMac',               lambda: st.getS(ns.publicInterface, rdf.Type(ns.NetworkInterface)).getS(ns.macAddress, rdf.String)),
                  ('vpneasePrivateAddress',          lambda: st.getS(ns.privateInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getAddress().toString()),
                  ('vpneasePrivateSubnet',           lambda: st.getS(ns.privateInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getMask().toString()),
                  ('vpneasePrivateMac',              lambda: st.getS(ns.privateInterface, rdf.Type(ns.NetworkInterface)).getS(ns.macAddress, rdf.String)),
                  ('vpneaseLicenseKey',              lambda: license_info.getS(ns_ui.licenseKey, rdf.String)),
                  ('vpneaseLicenseString',           lambda: license_info.getS(ns_ui.licenseString, rdf.String)),
                  ('vpneaseLicenseUserLimit',        lambda: usr_limit),
                  ('vpneaseLicenseSiteToSiteLimit',  lambda: s2s_limit),
                  ('vpneaseMaintenanceReboots',      lambda: global_st.getS(ns.periodicReboots, rdf.Integer)),
                  ('vpneaseWatchdogReboots',         lambda: global_st.getS(ns.watchdogReboots, rdf.Integer)),
                  ('vpneaseLicenseServerConnection', _get_management_conn),
                  ]:
        try:
            val = l()
            if val is not None:
                vals[k] = val
        except:
            # these are expected in several cases, so don't spew too much log about them
            # XXX: it would be better if the checkers would figure these out for themselves
            # (when a value is expected and when not)
            _log.info('failed to get snmp value for key %s' % k)
            #_log.exception('failed to get snmp value for key %s' % k)
                  
    keys = vals.keys()
    keys.sort()
    res = ''
    for k in keys:
        res += '%s=%s\n' % (k, vals[k])

    # to ASCII, escaping any non-ASCII chars with XML escapes
    res = res.encode('US-ASCII', 'xmlcharrefreplace')

    f = None
    try:
        f = open(constants.SNMP_DATA_FILE, 'wb')
        f.write(res)
    finally:
        if f:
            f.close()
        f = None