def _periodic_reboot_check(self): """Check for periodic reboot and take action if necessary. Tries to be clever and avoid reboot if connections are up. Uptime estimation is annoying: if time is changed on this reboot, the estimate may be grossly wrong. To ensure that we don't reboot on the first boot (when time is synchronized) uncontrollably, this function also checks that enough watchdog rounds have been run to warrant a reboot. The underlying assumption is that web UI has been running continuously, which is currently OK because we don't restart it ever (cron watchdog will just reboot if UI is down). Staggering of reboot is added by randomizing the "minute" of the reboot in the range [0,45] (not [0,60] for leeway). The "minute" is randomized when watchdog is created, so it stays the same every time for one reboot. Note that the stagger is effectively only applied to the first reboot attempt; next attempts (e.g. next day at designated time) will not have a stagger. If more staggering behavior is desired, see XXX below. """ uptime = self.master.get_uptime() reboot_required = False now = datetime.datetime.utcnow() _log.debug('_periodic_reboot_check: uptime=%s' % uptime) # Check whether UI configuration requires a reboot (time & day match) try: reboot_limit = uihelpers.compute_periodic_reboot_time() reboot_limit += self._periodic_reboot_stagger_delta _log.debug('_periodic_reboot_check: reboot limit after stagger: %s' % reboot_limit) lm = licensemanager.LicenseMonitor() count, limit, limit_leeway = lm.count_normal_users() # time to periodic reboot (negative = past due) diff = reboot_limit - now _log.debug('_periodic_reboot_check: periodic reboot diff (limit-now, time to reboot): %s' % str(diff)) if diff <= datetime.timedelta(0, 0, 0): overdue = -diff _log.debug('_periodic_reboot_check: periodic reboot is %s overdue' % overdue) if count > 0: # there are clients (without license restrictions!), give 24h leeway if overdue < datetime.timedelta(1, 0, 0): # XXX: hardcoded _log.info('_periodic_reboot_check: want to do a periodic reboot, but there are active clients (%d), skipping' % count) else: _log.warning('_periodic_reboot_check: want to a periodic reboot, active clients (%d), but leeway over, rebooting anyway' % count) reboot_required = True else: _log.warning('_periodic_reboot_check: want to do a periodic reboot, and no active clients, ok') reboot_required = True except: _log.exception('_periodic_reboot_check: failed when checking for periodic reboot policy') # If not within periodic reboot time window (e.g. 02:00-03:00 local time), # skip periodic reboot. if reboot_required: # XXX: better stagger check could be applied here (checked every day) if not uihelpers.check_periodic_reboot_time_window(now): _log.warning('_periodic_reboot_check: want to do a periodic reboot, but not within periodic reboot time window') reboot_required = False # If more than a maximum number of days, reboot, despite configuration if uptime > constants.PERIODIC_REBOOT_MAX_UPTIME: _log.warning('_periodic_reboot_check: uptime is too large (%s), requires reboot' % uptime) reboot_required = True elif uptime < 0.0: # negative uptime: ignore it for now; if the diff is great, we'll get a periodic reboot anyway later _log.warning('_periodic_reboot_check: uptime is negative (%s), ignoring' % uptime) # Sanity check: if we want to reboot, check that enough watchdog rounds # have elapsed (roughly 24h). if reboot_required: rounds = self.get_watchdog_rounds() if rounds < constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS: _log.warning('_periodic_reboot_check: want to do periodic reboot, but watchdog rounds too low (%d < %d)' % (rounds, constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS)) reboot_required = False # Take action if necessary if reboot_required: if self._periodic_reboot_started: _log.info('_periodic_reboot_check: reboot required but periodic reboot already in progress, no action needed') else: try: _log.warning('_periodic_reboot_check: periodic reboot started') self._periodic_reboot_started = True self._periodic_reboot_show_warning() helpers.increment_global_status_counter(ns.periodicReboots) helpers.db_flush() except: _log.exception('failed to increment counter') try: helpers.write_datetime_marker_file(constants.LAST_AUTOMATIC_REBOOT_MARKER_FILE) except: _log.exception('failed to write last automatic reboot marker file') uihelpers.ui_reboot(constants.WEBUI_PRODUCT_PERIODIC_REBOOT_MESSAGE, skip_update=False, force_update=False, force_fsck=True, delay=120.0) # XXX: constants
def _periodic_reboot_check(self): """Check for periodic reboot and take action if necessary. Tries to be clever and avoid reboot if connections are up. Uptime estimation is annoying: if time is changed on this reboot, the estimate may be grossly wrong. To ensure that we don't reboot on the first boot (when time is synchronized) uncontrollably, this function also checks that enough watchdog rounds have been run to warrant a reboot. The underlying assumption is that web UI has been running continuously, which is currently OK because we don't restart it ever (cron watchdog will just reboot if UI is down). Staggering of reboot is added by randomizing the "minute" of the reboot in the range [0,45] (not [0,60] for leeway). The "minute" is randomized when watchdog is created, so it stays the same every time for one reboot. Note that the stagger is effectively only applied to the first reboot attempt; next attempts (e.g. next day at designated time) will not have a stagger. If more staggering behavior is desired, see XXX below. """ uptime = self.master.get_uptime() reboot_required = False now = datetime.datetime.utcnow() _log.debug('_periodic_reboot_check: uptime=%s' % uptime) # Check whether UI configuration requires a reboot (time & day match) try: reboot_limit = uihelpers.compute_periodic_reboot_time() reboot_limit += self._periodic_reboot_stagger_delta _log.debug( '_periodic_reboot_check: reboot limit after stagger: %s' % reboot_limit) lm = licensemanager.LicenseMonitor() count, limit, limit_leeway = lm.count_normal_users() # time to periodic reboot (negative = past due) diff = reboot_limit - now _log.debug( '_periodic_reboot_check: periodic reboot diff (limit-now, time to reboot): %s' % str(diff)) if diff <= datetime.timedelta(0, 0, 0): overdue = -diff _log.debug( '_periodic_reboot_check: periodic reboot is %s overdue' % overdue) if count > 0: # there are clients (without license restrictions!), give 24h leeway if overdue < datetime.timedelta(1, 0, 0): # XXX: hardcoded _log.info( '_periodic_reboot_check: want to do a periodic reboot, but there are active clients (%d), skipping' % count) else: _log.warning( '_periodic_reboot_check: want to a periodic reboot, active clients (%d), but leeway over, rebooting anyway' % count) reboot_required = True else: _log.warning( '_periodic_reboot_check: want to do a periodic reboot, and no active clients, ok' ) reboot_required = True except: _log.exception( '_periodic_reboot_check: failed when checking for periodic reboot policy' ) # If not within periodic reboot time window (e.g. 02:00-03:00 local time), # skip periodic reboot. if reboot_required: # XXX: better stagger check could be applied here (checked every day) if not uihelpers.check_periodic_reboot_time_window(now): _log.warning( '_periodic_reboot_check: want to do a periodic reboot, but not within periodic reboot time window' ) reboot_required = False # If more than a maximum number of days, reboot, despite configuration if uptime > constants.PERIODIC_REBOOT_MAX_UPTIME: _log.warning( '_periodic_reboot_check: uptime is too large (%s), requires reboot' % uptime) reboot_required = True elif uptime < 0.0: # negative uptime: ignore it for now; if the diff is great, we'll get a periodic reboot anyway later _log.warning( '_periodic_reboot_check: uptime is negative (%s), ignoring' % uptime) # Sanity check: if we want to reboot, check that enough watchdog rounds # have elapsed (roughly 24h). if reboot_required: rounds = self.get_watchdog_rounds() if rounds < constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS: _log.warning( '_periodic_reboot_check: want to do periodic reboot, but watchdog rounds too low (%d < %d)' % (rounds, constants.PERIODIC_REBOOT_MINIMUM_WATCHDOG_ROUNDS)) reboot_required = False # Take action if necessary if reboot_required: if self._periodic_reboot_started: _log.info( '_periodic_reboot_check: reboot required but periodic reboot already in progress, no action needed' ) else: try: _log.warning( '_periodic_reboot_check: periodic reboot started') self._periodic_reboot_started = True self._periodic_reboot_show_warning() helpers.increment_global_status_counter(ns.periodicReboots) helpers.db_flush() except: _log.exception('failed to increment counter') try: helpers.write_datetime_marker_file( constants.LAST_AUTOMATIC_REBOOT_MARKER_FILE) except: _log.exception( 'failed to write last automatic reboot marker file') uihelpers.ui_reboot( constants.WEBUI_PRODUCT_PERIODIC_REBOOT_MESSAGE, skip_update=False, force_update=False, force_fsck=True, delay=120.0) # XXX: constants
def _update_snmp(): """Update SNMP data.""" from codebay.l2tpserver import licensemanager from codebay.l2tpserver import helpers from codebay.l2tpserver.webui import uihelpers now = datetime.datetime.utcnow() st = helpers.get_status() global_st = helpers.get_global_status() license_info = helpers.get_license_info() def _timeticks(td): return int(helpers.timedelta_to_seconds(td) * 100.0) def _timestamp(dt): return datatypes.encode_datetime_to_iso8601_subset(dt) def _get_management_conn(): # XXX: not the best place for this if global_st.hasS(ns.managementServerConnection): if global_st.getS(ns.managementServerConnection, rdf.Boolean): return 1 return 0 vals = {} lm = licensemanager.LicenseMonitor() usr_count, usr_limit, usr_limit_leeway, s2s_count, s2s_limit, s2s_limit_leeway = None, None, None, None, None, None try: usr_count, usr_limit, usr_limit_leeway, s2s_count, s2s_limit, s2s_limit_leeway = lm.count_both_users() except: _log.exception('cannot get ppp counts for snmp') # XXX: this sharing of status code is quite unclean; see uihelpers.get_status_and_substatus() for suggestions health_errors = 0 try: status_class, status_text, substatus_class, substatus_text, status_ok = uihelpers.get_status_and_substatus() if status_ok: health_errors = 0 else: health_errors = 1 except: _log.exception('cannot determine health errors') for k, l in [ ('vpneaseHealthCheckErrors', lambda: health_errors), ('vpneaseUserCount', lambda: usr_count), ('vpneaseSiteToSiteCount', lambda: s2s_count), ('vpneaseLastMaintenanceReboot', lambda: _timestamp(helpers.read_datetime_marker_file(constants.LAST_AUTOMATIC_REBOOT_MARKER_FILE))), ('vpneaseNextMaintenanceReboot', lambda: _timestamp(uihelpers.compute_periodic_reboot_time())), ('vpneaseLastSoftwareUpdate', lambda: _timestamp(helpers.read_datetime_marker_file(constants.LAST_SUCCESSFUL_UPDATE_MARKER_FILE))), ('vpneaseSoftwareVersion', lambda: helpers.get_product_version(cache=True, filecache=True)), ('vpneaseCpuUsage', lambda: int(global_st.getS(ns.cpuUsage, rdf.Float))), ('vpneaseMemoryUsage', lambda: int(global_st.getS(ns.memoryUsage, rdf.Float))), ('vpneaseVirtualMemoryUsage', lambda: int(global_st.getS(ns.swapUsage, rdf.Float))), ('vpneaseServiceUptime', lambda: _timeticks(now - st.getS(ns.startTime, rdf.Datetime))), ('vpneaseHostUptime', lambda: _timeticks(datetime.timedelta(0, helpers.get_uptime(), 0))), ('vpneasePublicAddress', lambda: st.getS(ns.publicInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getAddress().toString()), ('vpneasePublicSubnet', lambda: st.getS(ns.publicInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getMask().toString()), ('vpneasePublicMac', lambda: st.getS(ns.publicInterface, rdf.Type(ns.NetworkInterface)).getS(ns.macAddress, rdf.String)), ('vpneasePrivateAddress', lambda: st.getS(ns.privateInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getAddress().toString()), ('vpneasePrivateSubnet', lambda: st.getS(ns.privateInterface, rdf.Type(ns.NetworkInterface)).getS(ns.ipAddress, rdf.IPv4AddressSubnet).getMask().toString()), ('vpneasePrivateMac', lambda: st.getS(ns.privateInterface, rdf.Type(ns.NetworkInterface)).getS(ns.macAddress, rdf.String)), ('vpneaseLicenseKey', lambda: license_info.getS(ns_ui.licenseKey, rdf.String)), ('vpneaseLicenseString', lambda: license_info.getS(ns_ui.licenseString, rdf.String)), ('vpneaseLicenseUserLimit', lambda: usr_limit), ('vpneaseLicenseSiteToSiteLimit', lambda: s2s_limit), ('vpneaseMaintenanceReboots', lambda: global_st.getS(ns.periodicReboots, rdf.Integer)), ('vpneaseWatchdogReboots', lambda: global_st.getS(ns.watchdogReboots, rdf.Integer)), ('vpneaseLicenseServerConnection', _get_management_conn), ]: try: val = l() if val is not None: vals[k] = val except: # these are expected in several cases, so don't spew too much log about them # XXX: it would be better if the checkers would figure these out for themselves # (when a value is expected and when not) _log.info('failed to get snmp value for key %s' % k) #_log.exception('failed to get snmp value for key %s' % k) keys = vals.keys() keys.sort() res = '' for k in keys: res += '%s=%s\n' % (k, vals[k]) # to ASCII, escaping any non-ASCII chars with XML escapes res = res.encode('US-ASCII', 'xmlcharrefreplace') f = None try: f = open(constants.SNMP_DATA_FILE, 'wb') f.write(res) finally: if f: f.close() f = None