Python Logger.Logger примеры использования

Язык программирования: Python

Пространство имен/Пакет: ovs.extensions.healthcheck.logger

Класс/Тип: Logger

Метод/Функция: Logger

Примеров на hotexamples.com: 8

Python Logger.Logger - 8 примеров найдено. Это лучшие примеры Python кода для ovs.extensions.healthcheck.logger.Logger.Logger, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Logger(8)

debug(1)

exception(1)

warning(1)

Основные методы

Logger (8)

debug (1)

exception (1)

warning (1)

Пример #1

Показать файл

class HealthCheckShared(object):
    """
    Constants for the HealthcheckCLI
    """

    ADDON_TYPE = 'healthcheck'
    CACHE_KEY = 'ovs_healthcheck_discover_method'

    logger = Logger("healthcheck-ovs_clirunner")
    CMD_FOLDER = os.path.join(os.path.dirname(__file__), 'suites')  # Folder to query for commands

    CONTEXT_SETTINGS_KEY = '/ovs/healthcheck/default_arguments'
    _context_settings = {}  # Cache

    @staticmethod
    def get_healthcheck_results(result_handler):
        # type (HCResults) -> dict
        """
        Output the Healthcheck results
        :param result_handler: HCResults instance
        :type result_handler: HCResults
        :return dict with information
        :rtype: dict
        """
        recap_executer = 'Health Check'
        result = result_handler.get_results()
        result_handler.info("Recap of {0}!".format(recap_executer))
        result_handler.info("======================")
        recount = []  # Order matters
        for severity in ['SUCCESS', 'FAILED', 'SKIPPED', 'WARNING', 'EXCEPTION']:
            recount.append((severity, result_handler.counter[severity]))
        result_handler.info(' '.join('{0}={1}'.format(s, v) for s, v in recount))
        # returns dict with minimal and detailed information
        return {'result': result, 'recap': dict(recount)}

    @classmethod
    def get_default_arguments(cls):
        if not cls._context_settings:
            cls._context_settings = Configuration.get(cls.CONTEXT_SETTINGS_KEY, default={})
        return cls._context_settings

Пример #2

Показать файл

Файл: volumedriver.py Проект: yongshengma/openvstorage-health-check

class VolumedriverHealthCheck(object):
    """
    A healthcheck for the volumedriver components
    """
    MODULE = 'volumedriver'
    LOCAL_ID = System.get_my_machine_id()
    LOCAL_SR = System.get_my_storagerouter()
    VDISK_CHECK_SIZE = 1024 ** 3  # 1GB in bytes
    VDISK_HALTED_STATES = DataObject.enumerator('Halted_status', ['HALTED', 'FENCED'])
    VDISK_TIMEOUT_BEFORE_DELETE = 0.5
    # Only used to check status of a fenced volume. This should not be used to link a status of a non-halted/fenced volume
    FENCED_HALTED_STATUS_MAP = {'max_redirect': {'status': VDisk.STATUSES.NON_RUNNING,
                                                 'severity': 'failure',
                                                 'halted': ('These volumes are not running: {0}', ErrorCodes.volume_max_redirect),
                                                 'fenced': ('These volumes are fenced but not running on another node: {0}', ErrorCodes.volume_fenced_max_redirect)},
                                'halted': {'status': VDisk.STATUSES.HALTED,
                                           'severity': 'failure',
                                           'halted': ('These volumes are halted: {0}', ErrorCodes.volume_halted),
                                           'fenced': ('These volumes are fenced and but halted on another node: {0}', ErrorCodes.volume_fenced_halted)},
                                'connection_fail': {'status': 'UNKNOWN',
                                                    'severity': 'failure',
                                                    'halted': ('These volumes experienced a connectivity/timeout problem: {0}', ErrorCodes.voldrv_connection_problem),
                                                    'fenced': ('These volumes are fenced but experienced a connectivity/timeout problem on another node: {0}', ErrorCodes.voldrv_connection_problem)},
                                'ok': {'status': VDisk.STATUSES.RUNNING,
                                       'severity': 'failure',
                                       'halted': ('These volumes are running: {0}', ErrorCodes.volume_ok),
                                       'fenced': ('These volumes are fenced but running on another node: {0}', ErrorCodes.volume_fenced_ok)},
                                'not_found': {'status': 'NOT_FOUND',
                                              'severity': 'warning',
                                              'halted': ('These volumes could not be queried for information: {0}', ErrorCodes.volume_not_found),
                                              'fenced': ('These volumes are fenced but could not be queried for information on another node: {0}', ErrorCodes.volume_fenced_not_found)}}

    logger = Logger('healthcheck-ovs_volumedriver')

    @staticmethod
    @expose_to_cli(MODULE, 'dtl-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that all VDisks their DTL is properly running',
                   short_help='Test if DTL is properly running')
    def check_dtl(result_handler):
        """
        Checks the dtl for all vdisks on the local node
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        # Fetch vdisks hosted on this machine
        local_sr = System.get_my_storagerouter()
        if len(local_sr.vdisks_guids) == 0:
            return result_handler.skip('No VDisks present in cluster.')
        for vdisk_guid in local_sr.vdisks_guids:
            vdisk = VDisk(vdisk_guid)
            vdisk.invalidate_dynamics(['dtl_status', 'info'])
            if vdisk.dtl_status == 'ok_standalone' or vdisk.dtl_status == 'disabled':
                result_handler.success('VDisk {0}s DTL is disabled'.format(vdisk.name), code=ErrorCodes.volume_dtl_standalone)
            elif vdisk.dtl_status == 'ok_sync':
                result_handler.success('VDisk {0}s DTL is enabled and running.'.format(vdisk.name), code=ErrorCodes.volume_dtl_ok)
            elif vdisk.dtl_status == 'degraded':
                result_handler.warning('VDisk {0}s DTL is degraded.'.format(vdisk.name), code=ErrorCodes.volume_dtl_degraded)
            elif vdisk.dtl_status == 'checkup_required':
                result_handler.warning('VDisk {0}s DTL should be configured.'.format(vdisk.name), code=ErrorCodes.volume_dtl_checkup_required)
            elif vdisk.dtl_status == 'catch_up':
                result_handler.warning('VDisk {0}s DTL is enabled but still syncing.'.format(vdisk.name), code=ErrorCodes.volume_dtl_catch_up)
            else:
                result_handler.warning('VDisk {0}s DTL has an unknown status: {1}.'.format(vdisk.name, vdisk.dtl_status), code=ErrorCodes.volume_dtl_unknown)

    @staticmethod
    @timeout_decorator.timeout(30)
    def _check_volumedriver(vdisk_name, storagedriver_guid, logger, vdisk_size=VDISK_CHECK_SIZE):
        """
        Checks if the volumedriver can create a new vdisk
        :param vdisk_name: name of a vdisk (e.g. test.raw)
        :type vdisk_name: str
        :param storagedriver_guid: guid of a storagedriver
        :type storagedriver_guid: str
        :param vdisk_size: size of the volume in bytes (e.g. 10737418240 is 10GB in bytes)
        :type vdisk_size: int
        :param logger: logger instance
        :type logger: ovs.extensions.healthcheck.result.HCResults
        :return: True if succeeds
        :rtype: bool
        """
        try:
            VDiskController.create_new(vdisk_name, vdisk_size, storagedriver_guid)
        except FileExistsException:
            # can be ignored until fixed in framework
            # https://github.com/openvstorage/framework/issues/1247
            return True
        except Exception as ex:
            logger.failure('Creation of the vdisk failed. Got {0}'.format(str(ex)))
            return False
        return True

    @staticmethod
    @timeout_decorator.timeout(30)
    def _check_volumedriver_remove(vpool_name, vdisk_name, present=True):
        """
        Remove a vdisk from a vpool
        :param vdisk_name: name of a vdisk (e.g. test.raw)
        :type vdisk_name: str
        :param vpool_name: name of a vpool
        :type vpool_name: str
        :param present: should the disk be present?
        :type present: bool
        :return: True if disk is not present anymore
        :rtype: bool
        """
        try:
            vdisk = VDiskHelper.get_vdisk_by_name(vdisk_name=vdisk_name, vpool_name=vpool_name)
            VDiskController.delete(vdisk.guid)
            return True
        except VDiskNotFoundError:
            # not found, if it should be present, re-raise the exception
            if present:
                raise
            else:
                return True

    @staticmethod
    # @expose_to_cli(MODULE, 'volumedrivers-test', HealthCheckCLI.ADDON_TYPE,
    #                help='Verify that the Volumedrivers are responding to events',
    #                short_help='Test if Volumedrivers are responding to events')
    def check_volumedrivers(result_handler):
        """
        Checks if the VOLUMEDRIVERS work on a local machine (compatible with multiple vPools)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        result_handler.info('Checking volumedrivers.', add_to_result=False)
        vpools = VPoolList.get_vpools()
        if len(vpools) == 0:
            result_handler.skip('No vPools found!')
            return
        for vp in vpools:
            name = 'ovs-healthcheck-test-{0}.raw'.format(VolumedriverHealthCheck.LOCAL_ID)
            if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids:
                result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
                continue
            try:
                # delete if previous vdisk with this name exists
                storagedriver_guid = next((storagedriver.guid for storagedriver in vp.storagedrivers
                                           if storagedriver.storagedriver_id == vp.name +
                                           VolumedriverHealthCheck.LOCAL_ID))
                # create a new one
                volume = VolumedriverHealthCheck._check_volumedriver(name, storagedriver_guid, result_handler)

                if volume is True:
                    # delete the recently created
                    try:
                        VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name)
                    except Exception as ex:
                        raise RuntimeError('Could not delete the created volume. Got {0}'.format(str(ex)))
                    # Working at this point
                    result_handler.success('Volumedriver of vPool {0} is working fine!'.format(vp.name))
                else:
                    # not working
                    result_handler.failure('Something went wrong during vdisk creation on vpool {0}.'.format(vp.name))

            except TimeoutError:
                # timeout occurred, action took too long
                result_handler.warning('Volumedriver of vPool {0} seems to timeout.'.format(vp.name))
            except IOError as ex:
                # can be input/output error by volumedriver
                result_handler.failure('Volumedriver of vPool {0} seems to have IO problems. Got `{1}` while executing.'.format(vp.name, ex.message))
            except RuntimeError as ex:
                result_handler.failure('Volumedriver of vPool {0} seems to have problems. Got `{1}` while executing.'.format(vp.name, ex))
            except VDiskNotFoundError:
                result_handler.warning('Volume on vPool {0} was not found, please retry again'.format(vp.name))
            except Exception as ex:
                result_handler.failure('Uncaught exception for Volumedriver of vPool {0}.Got {1} while executing.'.format(vp.name, ex))
            finally:
                # Attempt to delete the created vdisk
                try:
                    VolumedriverHealthCheck._check_volumedriver_remove(vpool_name=vp.name, vdisk_name=name, present=False)
                except:
                    pass

    @classmethod
    def _is_volumedriver_timeout(cls, exception):
        """
        Validates whether a certain exception is a timeout exception (RuntimeError, prior to NodeNotReachable in voldriver 6.17)
        :param exception: Exception object to check
        :return: True if it is a timeout or False if it's not
        :rtype: bool
        """
        return isinstance(exception, ClusterNotReachableException) or isinstance(exception, RuntimeError) and 'failed to send XMLRPC request' in str(exception)

    @classmethod
    @expose_to_cli(MODULE, 'halted-volumes-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that there are no halted/fenced volumes within the cluster',
                   short_help='Test if there  are no halted/fenced volumes')
    def check_for_halted_volumes(cls, result_handler):
        """
        Checks for halted volumes on a single or multiple vPools
        This will only check the volume states on the current node. If any other volumedriver would be down,
        only the HA'd volumes would pop-up as they could appear halted here (should be verified by the volumedriver team)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        vpools = VPoolList.get_vpools()
        local_sr = System.get_my_storagerouter()

        if len(vpools) == 0:
            result_handler.skip('No vPools found!'.format(len(vpools)), code=ErrorCodes.vpools_none)
            return
        for vpool in vpools:
            log_start = 'Halted volumes test vPool {0}'.format(vpool.name)
            if vpool.guid not in local_sr.vpools_guids:
                result_handler.skip('{0} - Skipping vPool {1} because it is not living here.'.format(log_start, vpool.name),
                                    code=ErrorCodes.vpool_not_local, add_to_result=False)
                continue

            result_handler.info('{0} - Retrieving all information'.format(log_start), add_to_result=False)
            storagedriver = None
            for std in vpool.storagedrivers:
                if std.storagerouter_guid == local_sr.guid:
                    storagedriver = std
                    break

            if storagedriver is None:
                result_handler.failure('{0} - Could not associate a StorageDriver with this StorageRouter'.format(log_start),
                                       code=ErrorCodes.std_no_str)
                continue

            volume_fenced_states = dict((key, []) for key in cls.FENCED_HALTED_STATUS_MAP.keys())
            volume_lists = {cls.VDISK_HALTED_STATES.HALTED: [], cls.VDISK_HALTED_STATES.FENCED: []}
            volume_states = {cls.VDISK_HALTED_STATES.HALTED: {cls.VDISK_HALTED_STATES.HALTED: volume_lists[cls.VDISK_HALTED_STATES.HALTED]},
                             cls.VDISK_HALTED_STATES.FENCED: volume_fenced_states}  # Less loops to write for outputting
            result_handler.info('{0} - Scanning for halted volumes'.format(log_start), add_to_result=False)
            try:
                voldrv_client = vpool.storagedriver_client
                objectregistry_client = vpool.objectregistry_client
            except Exception:
                cls.logger.exception('{0} - Unable to instantiate the required clients'.format(log_start))
                result_handler.exception('{0} - Unable to load the Volumedriver clients'.format(log_start),
                                         code=ErrorCodes.voldr_unknown_problem)
                continue
            try:
                # Listing all halted volumes with the volumedriver client as it detects stolen volumes too (fenced instances)
                volumes = voldrv_client.list_halted_volumes(str(storagedriver.storagedriver_id))
            except Exception as ex:
                cls.logger.exception('{0} - Exception occurred when listing volumes'.format(log_start))
                if cls._is_volumedriver_timeout(ex) is False:
                    # Unhandled exception at this point
                    result_handler.exception('{0} - Unable to list the Volumes due to an unidentified problem. Please check the logging'.format(log_start),
                                             code=ErrorCodes.voldr_unknown_problem)
                else:
                    result_handler.failure('{0} - Could not list the volumes for due to a connection problem.'.format(log_start),
                                           code=ErrorCodes.voldrv_connection_problem)
                continue
            # Retrieve the parent of the current volume. If this id would not be identical to the one we fetched for, that would mean it is fenced
            # Object registry goes to Arakoon
            # Capturing any possible that would occur to provide a clearer vision of what went wrong
            for volume in volumes:
                try:
                    registry_entry = objectregistry_client.find(volume)
                    if registry_entry.node_id() == storagedriver.storagedriver_id:
                        volume_lists[cls.VDISK_HALTED_STATES.HALTED].append(volume)
                    else:
                        # Fenced
                        volume_lists[cls.VDISK_HALTED_STATES.FENCED].append(volume)
                except Exception:
                    msg = '{0} - Unable to consult the object registry client for volume \'{1}\''.format(log_start, volume)
                    cls.logger.exception(msg)
                    result_handler.exception(msg, code=ErrorCodes.voldr_unknown_problem)
            # Include fenced - OTHER state combo
            for volume in volume_lists[cls.VDISK_HALTED_STATES.FENCED]:
                try:
                    _, state = cls._get_volume_issue(voldrv_client, volume, log_start)
                    volume_fenced_states[state].append(volume)
                except Exception:
                    # Only unhandled at this point
                    result_handler.exception('{0} - Unable to the volume info for volume {1} due to an unidentified problem. Please check the logging'.format(log_start, volume),
                                             code=ErrorCodes.voldr_unknown_problem)
            for halted_state, volume_state_info in volume_states.iteritems():
                for state, volumes in volume_state_info.iteritems():
                    if len(volumes) == 0:
                        continue  # Skip OK/empty lists
                    map_value = cls.FENCED_HALTED_STATUS_MAP[state.lower()]
                    log_func = getattr(result_handler, map_value['severity'])
                    message, code = map_value[halted_state.lower()]
                    log_func('{0} - {1}'.format(log_start, message.format(', '.join(volumes))), code=code)
            # Call success in case nothing is wrong
            if all(len(l) == 0 for l in volume_lists.values()):
                result_handler.success('{0} - No volumes found in halted/fenced state'.format(log_start))

    @classmethod
    def _get_volume_issue(cls, voldrv_client, volume_id, log_start):
        """
        Maps all possible exceptions to a state. These states can be mapped to a status using the FENCED_HALTED_STATUS_MAP
        because the volumedriver does not return a state itself
        :param voldrv_client: Storagedriver client
        :param volume_id: Id of the volume
        :raises: The unhandled exception when such an exception could occur (we try to identify all problems but one could slip past us)
        :return: The volume_id and state
        :rtype: tuple(str, str)
        """
        state = 'ok'
        try:
            # Check if the information can be retrieved about the volume
            vol_info = voldrv_client.info_volume(volume_id, req_timeout_secs=5)
            if vol_info.halted is True:
                state = 'halted'
        except Exception as ex:
            cls.logger.exception('{0} - Exception occurred when fetching the info for volume \'{1}\''.format(log_start, volume_id))
            if isinstance(ex, ObjectNotFoundException):
                # Ignore ovsdb invalid entrees as model consistency will handle it.
                state = 'not_found'
            elif isinstance(ex, MaxRedirectsExceededException):
                # This means the volume is not halted but detached or unreachable for the Volumedriver
                state = 'max_redirect'
            # @todo replace RuntimeError with NodeNotReachableException
            elif any(isinstance(ex, exception) for exception in [ClusterNotReachableException, RuntimeError]):
                if cls._is_volumedriver_timeout(ex) is False:
                    # Unhandled exception at this point
                    raise
                # Timeout / connection problems
                state = 'connection_fail'
            else:
                # Something to be looked at
                raise
        return volume_id, state

    @staticmethod
    @timeout_decorator.timeout(5)
    def _check_filedriver(vp_name, test_name):
        """
        Async method to checks if a FILEDRIVER `touch` works on a vpool
        Always try to check if the file exists after performing this method
        :param vp_name: name of the vpool
        :type vp_name: str
        :param test_name: name of the test file (e.g. `ovs-healthcheck-LOCAL_ID`)
        :type test_name: str
        :return: True if succeeded, False if failed
        :rtype: bool
        """
        return subprocess.check_output('touch /mnt/{0}/{1}.xml'.format(vp_name, test_name), stderr=subprocess.STDOUT, shell=True)

    @staticmethod
    @timeout_decorator.timeout(5)
    def _check_filedriver_remove(vp_name):
        """
        Async method to checks if a FILEDRIVER `remove` works on a vpool
        Always try to check if the file exists after performing this method
        :param vp_name: name of the vpool
        :type vp_name: str
        :return: True if succeeded, False if failed
        :rtype: bool
        """
        subprocess.check_output('rm -f /mnt/{0}/ovs-healthcheck-test-*.xml'.format(vp_name), stderr=subprocess.STDOUT, shell=True)
        return not os.path.exists('/mnt/{0}/ovs-healthcheck-test-*.xml'.format(vp_name))

    @staticmethod
    # @expose_to_cli(MODULE, 'filedrivers-test', HealthCheckCLI.ADDON_TYPE,
    #                help='Verify that all Volumedrivers are accessible through FUSE',
    #                short_help='Test if that the FUSE layer is responding')
    # @todo replace fuse test with edge test
    def check_filedrivers(result_handler):
        """
        Checks if the file drivers work on a local machine (compatible with multiple vPools)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        """
        result_handler.info('Checking file drivers.', add_to_result=False)
        vpools = VPoolList.get_vpools()
        # perform tests
        if len(vpools) == 0:
            result_handler.skip('No vPools found!')
            return
        for vp in vpools:
            name = 'ovs-healthcheck-test-{0}'.format(VolumedriverHealthCheck.LOCAL_ID)
            if vp.guid not in VolumedriverHealthCheck.LOCAL_SR.vpools_guids:
                result_handler.skip('Skipping vPool {0} because it is not living here.'.format(vp.name))
                continue
            try:
                VolumedriverHealthCheck._check_filedriver(vp.name, name)
                if os.path.exists('/mnt/{0}/{1}.xml'.format(vp.name, name)):
                    # working
                    VolumedriverHealthCheck._check_filedriver_remove(vp.name)
                    result_handler.success('Filedriver for vPool {0} is working fine!'.format(vp.name))
                else:
                    # not working
                    result_handler.failure('Filedriver for vPool {0} seems to have problems!'.format(vp.name))
            except TimeoutError:
                # timeout occurred, action took too long
                result_handler.warning('Filedriver of vPool {0} seems to have `timeout` problems'.format(vp.name))
            except subprocess.CalledProcessError:
                # can be input/output error by filedriver
                result_handler.failure('Filedriver of vPool {0} seems to have `input/output` problems'.format(vp.name))

    @staticmethod
    @expose_to_cli(MODULE, 'volume-potential-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that the Volumedrivers have enough VDisk potential left',
                   short_help='Test if the Volumedrivers can create enough VDisks')
    @expose_to_cli.option('--critical-vol-number', '-c', type=int, default=25, help='Minimum number of volumes left to create')
    def check_volume_potential(result_handler, critical_vol_number=25):
        """
        Checks all local storage drivers from a volume driver. Results in a success if enough volumes are available, a warning if the number of volumes is
        lower then a threshold value (critical_volume_number) and a failure if the nr of volumes ==0)
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param critical_vol_number: Mimimal number of volumes that can be made before throwing a warning
        :type critical_vol_number: int
        """
        result_handler.info('Checking volume potential of storagedrivers')

        if not isinstance(critical_vol_number, int) or critical_vol_number < 0:
            raise ValueError('Critical volume number should be a positive integer')

        for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers:
            try:
                std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id)
                client = LocalStorageRouterClient(std_config.remote_path)
                vol_potential = client.volume_potential(str(std.storagedriver_id))
                if vol_potential >= critical_vol_number:
                    log_level = 'success'
                elif critical_vol_number > vol_potential > 0:
                    log_level = 'warning'
                else:
                    log_level = 'failure'
                getattr(result_handler, log_level)('Volume potential of local storage driver: {0}: {1} (potential at: {2})'.format(std.storagedriver_id, log_level.upper(), vol_potential))
            except RuntimeError:
                result_handler.exception('Unable to retrieve configuration for storagedriver {0}'.format(std.storagedriver_id))

    @staticmethod
    @expose_to_cli(MODULE, 'sco-cache-mountpoint-test', HealthCheckCLI.ADDON_TYPE,
                   help='Verify that sco-cache mountpoints are up and running',
                   short_help='Test if sco-cache mountpoints are up and running')
    def check_sco_cache_mountpoints(result_handler):
        """
        Iterates over StorageDrivers of a local StorageRouter and will check all its sco cache mount points.
        Will result in a warning log if the sco is in offline state
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        """
        result_handler.info('Checking sco cache mount points on all local storagedrivers')
        for std in VolumedriverHealthCheck.LOCAL_SR.storagedrivers:
            try:
                std_config = StorageDriverConfiguration(std.vpool_guid, std.storagedriver_id)
                client = LocalStorageRouterClient(std_config.remote_path)
                for std_info in client.sco_cache_mount_point_info(str(std.storagedriver_id)):
                    if std_info.offlined is True:
                        result_handler.warning('Mountpoint at location {0} of storagedriver {1} is in offline state'.format(std_info.path, std.storagedriver_id))
                    else:
                        result_handler.success('Mountpoint at location {0} of storagedriver {1} is in online state'.format(std_info.path, std.storagedriver_id))
            except RuntimeError:
                result_handler.exception('Unable to check sco cache mountpoint of storagedriver {0}'.format(std.storagedriver_id))

Пример #3

Показать файл

    def run(command,
            config=None,
            named_params=None,
            extra_params=None,
            client=None,
            debug=False,
            to_json=True):
        """
        Executes a command on ALBA
        When --to-json is NOT passed:
            * An error occurs --> exitcode != 0
            * It worked --> exitcode == 0

        When --to-json is passed:
            * An errors occurs during verification of parameters passed  -> exitcode != 0
            * An error occurs while executing the command --> exitcode == 0 (error in json output)
            * It worked --> exitcode == 0

        :param command: The command to execute, eg: 'list-namespaces'
        :type command: str
        :param config: The configuration location to be used, eg: 'arakoon://config/ovs/arakoon/ovsdb/config?ini=%2Fopt%2FOpenvStorage%2Fconfig%2Farakoon_cacc.ini'
        :type config: str
        :param named_params: Additional parameters to be given to the command, eg: {'long-id': ','.join(asd_ids)}
        :type named_params: dict
        :param extra_params: Additional parameters to be given to the command, eg: [name]
        :type extra_params: list
        :param client: A client on which to execute the command
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param debug: Log additional output
        :type debug: bool
        :param to_json: Parse the output as json
        :type to_json: bool
        :return: The output of the command
        :rtype: dict
        """
        if named_params is None:
            named_params = {}
        if extra_params is None:
            extra_params = []

        logger = Logger('healthcheck-alba_cli')
        if os.environ.get('RUNNING_UNITTESTS') == 'True':
            # For the unittest, all commands are passed to a mocked Alba
            from ovs.extensions.plugins.tests.alba_mockups import VirtualAlbaBackend
            named_params.update({'config': config})
            named_params.update({'extra_params': extra_params})
            return getattr(VirtualAlbaBackend,
                           command.replace('-', '_'))(**named_params)

        debug_log = []
        try:
            if to_json is True:
                extra_options = ["--to-json"]
            else:
                extra_options = []
            cmd_list = ['/usr/bin/alba', command] + extra_options
            if config is not None:
                cmd_list.append('--config={0}'.format(config))
            for key, value in named_params.iteritems():
                cmd_list.append('--{0}={1}'.format(key, value))
            cmd_list.extend(extra_params)
            cmd_string = ' '.join(cmd_list)
            debug_log.append('Command: {0}'.format(cmd_string))

            start = time.time()
            try:
                if client is None:
                    try:
                        if not hasattr(select, 'poll'):
                            import subprocess
                            subprocess._has_poll = False  # Damn 'monkey patching'
                        channel = Popen(cmd_list,
                                        stdout=PIPE,
                                        stderr=PIPE,
                                        universal_newlines=True)
                    except OSError as ose:
                        raise CalledProcessError(1, cmd_string, str(ose))
                    output, stderr = channel.communicate()
                    output = re.sub(r'[^\x00-\x7F]+', '', output)
                    stderr_debug = 'stderr: {0}'.format(stderr)
                    stdout_debug = 'stdout: {0}'.format(output)
                    if debug is True:
                        logger.debug(stderr_debug)
                    debug_log.append(stdout_debug)
                    exit_code = channel.returncode
                    if exit_code != 0:  # Raise same error as check_output
                        raise CalledProcessError(exit_code, cmd_string, output)
                else:
                    if debug is True:
                        output, stderr = client.run(cmd_list, debug=True)
                        debug_log.append('stderr: {0}'.format(stderr))
                    else:
                        output = client.run(cmd_list, debug=False).strip()
                    debug_log.append('stdout: {0}'.format(output))

                if to_json is True:
                    output = json.loads(output)
                else:
                    return output
                duration = time.time() - start
                if duration > 0.5:
                    logger.warning('AlbaCLI call {0} took {1}s'.format(
                        command, round(duration, 2)))
            except CalledProcessError as cpe:
                try:
                    output = json.loads(cpe.output)
                except Exception:
                    raise RuntimeError(
                        'Executing command {0} failed with output {1}'.format(
                            cmd_string, cpe.output))

            if output['success'] is True:
                return output['result']
            raise RuntimeError(output['error']['message'])

        except Exception as ex:
            logger.exception('Error: {0}'.format(ex))
            # In case there's an exception, we always log
            for debug_line in debug_log:
                logger.debug(debug_line)
            raise AlbaException(str(ex), command)

Пример #4

Показать файл

class CLIRunner(object):
    """
    Runs a method exposed by the expose_to_cli decorator. Serves as a base for all extensions using expose_to_cli
    """
    logger = Logger("healthcheck-ovs_clirunner")
    START_PATH = os.path.abspath(
        os.path.join(os.path.dirname(__file__), os.pardir))
    CACHE_KEY = 'ovs_discover_method'
    _WILDCARD = 'X'

    def __init__(self):
        pass

    @classmethod
    def _get_methods(cls,
                     module_name=_WILDCARD,
                     method_name=_WILDCARD,
                     addon_type=None):
        """
        Gets method by the specified values
        :param module_name: module to which the method belong
        :type module_name: str
        :param method_name: name of the method
        :type method_name: str
        :param addon_type: type of the method, distinguishes different addons
        :type addon_type: str
        :return: list of all found functions
        rtype: list[function]
        """
        result = []
        discovered_data = cls._discover_methods()
        module_names = discovered_data.keys(
        ) if module_name == cls._WILDCARD else [module_name]
        for module_name in module_names:
            if module_name not in discovered_data:
                raise ModuleNotRecognizedException()
            for function_data in discovered_data[module_name]:
                if addon_type != function_data['addon_type'] or (
                        method_name != cls._WILDCARD
                        and method_name != function_data['method_name']):
                    continue
                mod = imp.load_source(function_data['module_name'],
                                      function_data['location'])
                cl = getattr(mod, function_data['class'])()
                result.append(getattr(cl, function_data['function']))
                if method_name == function_data['method_name']:
                    break
        return result

    @classmethod
    def extract_arguments(cls, *args):
        """
        Extracts arguments from the CLI
        Always expects a module_name and a method_name (the wildcard is X)
        :param args: arguments passed on by bash
        :return: tuple of module_name, method_name, bool if --help was in and remaining arguments
        :rtype: tuple(str, str, bool, list)
        """
        args = list(args)
        help_requested = False
        # Always expect at least X X
        if len(args) < 2:
            raise ValueError('Expecting at least {0} {0} as arguments.'.format(
                cls._WILDCARD))
        if '--help' in args[0:3]:
            args.remove('--help')
            help_requested = True
        return args.pop(0), args.pop(0), help_requested, args

    @classmethod
    def run_method(cls, *args):
        """
        Executes the given method
        :return: None
        :rtype: NoneType
        """
        module_name, method_name, help_requested, args = cls.extract_arguments(
            *args)
        try:
            found_method_pointers = cls._get_methods(module_name, method_name)
        except ModuleNotRecognizedException:
            cls.print_help(cls._get_methods(), error_help=True)
            return
        if len(found_method_pointers
               ) == 0:  # Module found but no methods -> print help
            cls.print_help(cls._get_methods(module_name), error_help=True)
            return
        if help_requested is True:
            cls.print_help(found_method_pointers)
            return
        try:
            for found_method in found_method_pointers:
                found_method(*args)
        except KeyboardInterrupt:
            cls.logger.warning(
                'Caught keyboard interrupt. Output may be incomplete!')

    @classmethod
    def _discover_methods(cls):
        """
        Discovers all methods with the expose_to_cli decorator
        :return: dict that contains the required info based on module_name and method_name
        :rtype: dict
        """
        time_format = "%Y-%m-%d %H:%M:%S"
        version_id = 1
        start_path = cls.START_PATH
        client = VolatileFactory.get_client()
        cache_expirey_hours = 2  # Amount of hours the cache would expire

        def build_cache():
            """
            Build a dict listing all discovered methods with @expose_to_cli
            :return:  None
            :rtype: NoneType
            """
            # Build cache
            # Executed from lib, want to go to extensions/healthcheck
            found_items = {
                'expires':
                (datetime.now() +
                 timedelta(hours=cache_expirey_hours)).strftime(time_format)
            }
            path = start_path
            for root, dirnames, filenames in os.walk(path):
                for filename in filenames:
                    if not (filename.endswith('.py')
                            and filename != '__init__.py'):
                        continue
                    name = filename.replace('.py', '')
                    file_path = os.path.join(root, filename)
                    # Import file
                    mod = imp.load_source(name, file_path)
                    for member in inspect.getmembers(mod):
                        if not (inspect.isclass(member[1]) and
                                member[1].__module__ == name and 'object' in [
                                    base.__name__
                                    for base in member[1].__bases__
                                ]):
                            continue
                        for submember in inspect.getmembers(member[1]):
                            if not hasattr(submember[1], 'expose_to_cli'):
                                continue
                            exposed_data = submember[1].expose_to_cli
                            method_module_name = exposed_data['module_name']
                            method_name = exposed_data['method_name']
                            method_addon_type = exposed_data[
                                'addon_type'] if 'addon_type' in exposed_data else None
                            if method_module_name not in found_items:
                                found_items[method_module_name] = []
                            # noinspection PyUnresolvedReferences
                            found_items[method_module_name].append({
                                'method_name':
                                method_name,
                                'module_name':
                                name,
                                'function':
                                submember[1].__name__,
                                'class':
                                member[1].__name__,
                                'location':
                                file_path,
                                'version':
                                version_id,
                                'addon_type':
                                method_addon_type
                            })
            client.set(cls.CACHE_KEY, found_items)

        exposed_methods = client.get(cls.CACHE_KEY)
        # Search first to use old cache
        if exposed_methods and datetime.strptime(
                exposed_methods['expires'], time_format
        ) > datetime.now() + timedelta(hours=cache_expirey_hours):
            del exposed_methods['expires']
            return exposed_methods
        build_cache()
        exposed_methods = client.get(cls.CACHE_KEY)
        del exposed_methods['expires']
        return exposed_methods

    @classmethod
    def print_help(cls, method_pointers=None, error_help=False):
        """
        Prints the possible methods that are exposed to the CLI
        :param method_pointers: list of method pointers
        :type method_pointers: list[function]
        :param error_help: print extra help incase wrong arguments were suppplied
        :type error_help: bool
        :return: None
        :rtype: NoneType
        """
        if error_help is True:
            print 'Could not process your arguments.'
        if len(method_pointers) == 0:
            # Nothing found for the search terms
            print 'Found no methods matching your search terms.'
        elif len(method_pointers) == 1:
            # Found only one method -> search term was module_name + method_name
            print method_pointers[0].__doc__
            return
        print 'Possible optional arguments are:'
        # Multiple entries found means only the module_name was supplied
        print 'ovs healthcheck {0} {0} -- will run all checks'.format(
            CLIRunner._WILDCARD)
        print 'ovs healthcheck MODULE {0} -- will run all checks for module'.format(
            CLIRunner._WILDCARD)
        # Sort based on module_name
        print_dict = {}
        for method_pointer in method_pointers:
            module_name = method_pointer.expose_to_cli['module_name']
            method_name = method_pointer.expose_to_cli['method_name']
            if module_name in print_dict:
                print_dict[module_name].append(method_name)
                continue
            print_dict[module_name] = [method_name]
        for module_name, method_names in print_dict.iteritems():
            for method_name in method_names:
                print "ovs healthcheck {0} {1}".format(module_name,
                                                       method_name)

Пример #5

Показать файл

class HealthCheckCLIRunner(CLIRunner):
    """
    Healthcheck adaptation of CLIRunner
    Injects a result_handler instance with shared resources to every test to collect the results.
    """
    logger = Logger("healthcheck-healthcheck_clirunner")
    START_PATH = os.path.join(
        os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)),
        'healthcheck')
    ADDON_TYPE = 'healthcheck'

    @staticmethod
    def _keep_old_argument_style(args):
        """
        Fills up the missing arguments to the wildcards
        :param args: all arguments passed by bash
        :return:
        """
        args = list(args)
        possible_args = ['--help', '--unattended', '--to-json']
        indexes = [args.index(arg) for arg in args if arg in possible_args]
        if len(indexes) > 0:
            if indexes[0] == 0:
                args.insert(0, HealthCheckCLIRunner._WILDCARD)
                args.insert(1, HealthCheckCLIRunner._WILDCARD)
            elif indexes[0] == 1:
                args.insert(1, HealthCheckCLIRunner._WILDCARD)
        else:
            if len(args) == 0:
                args.insert(0, HealthCheckCLIRunner._WILDCARD)
                args.insert(1, HealthCheckCLIRunner._WILDCARD)
            elif len(args) == 1:
                args.insert(1, HealthCheckCLIRunner._WILDCARD)
        return args

    @staticmethod
    def run_method(*args):
        """
        Executes the given method
        :return: results & recap
        :rtype: dict
        """
        args = HealthCheckCLIRunner._keep_old_argument_style(args)
        unattended = False
        to_json = False
        if '--unattended' in args:
            args.remove('--unattended')
            unattended = True
        if '--to-json' in args:
            args.remove('--to-json')
            to_json = True
        module_name, method_name, help_requested, args = HealthCheckCLIRunner.extract_arguments(
            *args)
        result_handler = HCResults(unattended, to_json)
        try:
            found_method_pointers = HealthCheckCLIRunner._get_methods(
                module_name, method_name, HealthCheckCLIRunner.ADDON_TYPE)
        except ModuleNotRecognizedException:
            HealthCheckCLIRunner.print_help(HealthCheckCLIRunner._get_methods(
                addon_type=HealthCheckCLIRunner.ADDON_TYPE),
                                            error_help=True)
            return
        if len(found_method_pointers
               ) == 0:  # Module found but no methods -> print help
            HealthCheckCLIRunner.print_help(HealthCheckCLIRunner._get_methods(
                module_name=module_name,
                addon_type=HealthCheckCLIRunner.ADDON_TYPE),
                                            error_help=True)
            return
        if help_requested is True:
            HealthCheckCLIRunner.print_help(found_method_pointers)
            return
        local_settings = Helper.get_local_settings()
        for key, value in local_settings.iteritems():
            result_handler.info('{0}: {1}'.format(
                key.replace('_', ' ').title(), value))
        try:
            result_handler.info(
                'Starting OpenvStorage Healthcheck version {0}'.format(
                    Helper.get_healthcheck_version()))
            result_handler.info("======================")
            for found_method in found_method_pointers:
                test_name = '{0}-{1}'.format(
                    found_method.expose_to_cli['module_name'],
                    found_method.expose_to_cli['method_name'])
                try:
                    node_check(found_method)(
                        result_handler.HCResultCollector(result=result_handler,
                                                         test_name=test_name)
                    )  # Wrapped in nodecheck for callback
                except KeyboardInterrupt:
                    raise
                except Exception as ex:
                    result_handler.exception(
                        'Unhandled exception caught when executing {0}. Got {1}'
                        .format(found_method.__name__, str(ex)))
                    HealthCheckCLIRunner.logger.exception(
                        'Unhandled exception caught when executing {0}'.format(
                            found_method.__name__))
            return HealthCheckCLIRunner.get_results(result_handler,
                                                    module_name, method_name)
        except KeyboardInterrupt:
            HealthCheckCLIRunner.logger.warning(
                'Caught keyboard interrupt. Output may be incomplete!')
            return HealthCheckCLIRunner.get_results(result_handler,
                                                    module_name, method_name)

    @staticmethod
    def get_results(result_handler, module_name, method_name):
        """
        Gets the result of the Open vStorage healthcheck
        :param result_handler: result parser
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param module_name:  module name specified with the cli
        :type module_name: str
        :param method_name: method name specified with the cli
        :type method_name: str
        :return: results & recap
        :rtype: dict
        """
        recap_executer = 'Health Check'
        if module_name != HealthCheckCLIRunner._WILDCARD:
            recap_executer = '{0} module {1}'.format(recap_executer,
                                                     module_name)
        if method_name != HealthCheckCLIRunner._WILDCARD:
            recap_executer = '{0} test {1}'.format(recap_executer, method_name)

        result = result_handler.get_results()

        result_handler.info("Recap of {0}!".format(recap_executer))
        result_handler.info("======================")

        result_handler.info(
            "SUCCESS={0} FAILED={1} SKIPPED={2} WARNING={3} EXCEPTION={4}".
            format(result_handler.counters['SUCCESS'],
                   result_handler.counters['FAILED'],
                   result_handler.counters['SKIPPED'],
                   result_handler.counters['WARNING'],
                   result_handler.counters['EXCEPTION']))
        # returns dict with minimal and detailed information
        return {
            'result': result,
            'recap': {
                'SUCCESS': result_handler.counters['SUCCESS'],
                'FAILED': result_handler.counters['FAILED'],
                'SKIPPED': result_handler.counters['SKIPPED'],
                'WARNING': result_handler.counters['WARNING'],
                'EXCEPTION': result_handler.counters['EXCEPTION']
            }
        }

Пример #6

Показать файл

class ArakoonHealthCheck(object):
    """
    A healthcheck for the arakoon persistent store
    """

    logger = Logger("healthcheck-healthcheck_arakoon")
    MODULE = 'arakoon'

    @classmethod
    def _get_arakoon_clusters(cls, result_handler):
        """
        Retrieves all Arakoon clusters registered in this OVSCluster
        :param result_handler: Logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: Dict with the Arakoon cluster types as key and list with dicts which contain cluster names and pyrakoon clients
        :rtype: dict(str, list[dict])
        """
        result_handler.info('Fetching available arakoon clusters.',
                            add_to_result=False)
        arakoon_clusters = {}
        for cluster_name in list(
                Configuration.list('/ovs/arakoon')) + ['cacc']:
            # Determine Arakoon type
            is_cacc = cluster_name == 'cacc'
            arakoon_config = ArakoonClusterConfig(cluster_id=cluster_name,
                                                  load_config=not is_cacc)
            if is_cacc is True:
                with open(Configuration.CACC_LOCATION) as config_file:
                    contents = config_file.read()
                arakoon_config.read_config(contents=contents)
            try:
                arakoon_client = ArakoonInstaller.build_client(arakoon_config)
            except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
                result_handler.failure(
                    'Unable to find a master for Arakoon cluster {0}. (Message: {1})'
                    .format(cluster_name, str(ex)),
                    code=ErrorCodes.master_none)
            except Exception as ex:
                msg = 'Unable to connect to Arakoon cluster {0}. (Message: {1})'.format(
                    cluster_name, str(ex))
                result_handler.exception(msg,
                                         code=ErrorCodes.unhandled_exception)
                cls.logger.exception(msg)
                continue
            metadata = json.loads(
                arakoon_client.get(ArakoonInstaller.METADATA_KEY))
            cluster_type = metadata['cluster_type']
            if cluster_type not in arakoon_clusters:
                arakoon_clusters[cluster_type] = []
            arakoon_clusters[cluster_type].append({
                'cluster_name': cluster_name,
                'client': arakoon_client,
                'config': arakoon_config
            })
        return arakoon_clusters

    @classmethod
    @cluster_check
    @expose_to_cli(
        MODULE,
        'nodes-test',
        HealthCheckCLI.ADDON_TYPE,
        help=
        'Verify if nodes are missing and if nodes are catching up to the master',
        short_help='Test if there are nodes missing/catching up')
    @expose_to_cli.option(
        '--max-transactions-behind',
        '-m',
        type=int,
        default=10,
        help=
        'The number of transactions that a slave can be behind a master before logging a failure'
    )
    def check_node_status(cls, result_handler, max_transactions_behind=10):
        """
        Checks the status of every node within the Arakoon cluster
        This check will report what nodes are currently missing and what nodes are catching up to the master
        :param result_handler: Logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param max_transactions_behind: The number of transactions that a slave can be behind a master before logging a failure
        :type max_transactions_behind: int
        :return: None
        :rtype: NoneType
        """
        result_handler.info('Starting Arakoon nodes test.',
                            add_to_result=False)
        arakoon_clusters = cls._get_arakoon_clusters(result_handler)
        for cluster_type, clusters in arakoon_clusters.iteritems():
            result_handler.info(
                'Fetching the status of {0} Arakoons'.format(cluster_type),
                add_to_result=False)
            for cluster in clusters:
                arakoon_client = cluster['client']
                cluster_name = cluster['cluster_name']
                arakoon_config = cluster['config']
                # Map the node ids to the object for easier lookups
                node_info = dict(
                    (node.name, node) for node in arakoon_config.nodes)
                identifier = 'Arakoon cluster {0}'.format(cluster_name)
                try:
                    statistics = arakoon_client._client.statistics()
                    node_is = statistics['node_is']
                    # Look for any missing nodes within the cluster
                    missing_ids = list(
                        set(node_info.keys()) - set(node_is.keys()))
                    if len(missing_ids) > 0:
                        for missing_id in missing_ids:
                            node_config = node_info[missing_id]
                            result_handler.failure(
                                '{0} is missing node: {1}'.format(
                                    identifier, '{0} ({1}:{2})'.format(
                                        node_config.name, node_config.ip,
                                        node_config.client_port)),
                                code=ErrorCodes.node_missing)
                    highest_id = max(node_is.iteritems(),
                                     key=operator.itemgetter(1))[0]
                    for node_id, transactions in node_is.iteritems():
                        if node_id == highest_id:
                            continue
                        transactions_behind = node_is[highest_id] - transactions
                        node_config = node_info[node_id]
                        log = 'Node {0} ({1}:{2}) for {3} {{0}} ({4}/{5})'.format(
                            node_config.name, node_config.ip,
                            node_config.client_port, identifier,
                            transactions_behind, max_transactions_behind)
                        if transactions == 0:
                            result_handler.warning(
                                log.format('is catching up'),
                                code=ErrorCodes.slave_catch_up)
                        elif transactions_behind > max_transactions_behind:
                            result_handler.failure(
                                log.format('is behind the master'),
                                code=ErrorCodes.master_behind)
                        else:
                            result_handler.success(
                                log.format('is up to date'),
                                code=ErrorCodes.node_up_to_date)
                except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
                    result_handler.failure(
                        '{0} cannot find a master. (Message: {1})'.format(
                            identifier, str(ex)),
                        code=ErrorCodes.master_none)
                except Exception as ex:
                    cls.logger.exception(
                        'Unhandled exception during the nodes check')
                    result_handler.exception(
                        'Testing {0} threw an unhandled exception. (Message: {1})'
                        .format(identifier, str(ex)),
                        code=ErrorCodes.unhandled_exception)

    @classmethod
    @cluster_check
    @expose_to_cli(
        MODULE,
        'ports-test',
        HealthCheckCLI.ADDON_TYPE,
        help='Verifies that the Arakoon clusters still respond to connections',
        short_help='Test if Arakoons accepts connections')
    def check_arakoon_ports(cls, result_handler):
        """
        Verifies that the Arakoon clusters still respond to connections
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        arakoon_clusters = cls._get_arakoon_clusters(result_handler)
        result_handler.info('Starting Arakoon ports test.',
                            add_to_result=False)
        result_handler.info(
            'Retrieving all collapsing statistics. This might take a while',
            add_to_result=False)
        start = time.time()
        arakoon_stats = cls._get_port_connections(result_handler,
                                                  arakoon_clusters)
        result_handler.info(
            'Retrieving all collapsing statistics succeeded (duration: {0})'.
            format(time.time() - start),
            add_to_result=False)
        for cluster_type, clusters in arakoon_stats.iteritems():
            result_handler.info(
                'Testing the collapse of {0} Arakoons'.format(cluster_type),
                add_to_result=False)
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                connection_result = cluster['connection_result']
                connection_result = OrderedDict(
                    sorted(connection_result.items(),
                           key=lambda item: ExtensionsToolbox.advanced_sort(
                               item[0].ip, separator='.')))
                for node, stats in connection_result.iteritems():
                    identifier_log = 'Arakoon cluster {0} on node {1}'.format(
                        cluster_name, node.ip)
                    if len(stats['errors']) > 0:
                        # Determine where issues were found
                        for step, exception in stats['errors']:
                            if step == 'test_connection':
                                try:
                                    # Raise the thrown exception
                                    raise exception
                                except Exception:
                                    message = 'Connection to {0} could not be established due to an unhandled exception.'.format(
                                        identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(
                                        message,
                                        code=ErrorCodes.unhandled_exception)
                        continue
                    if stats['result'] is True:
                        result_handler.success(
                            'Connection established to {0}'.format(
                                identifier_log),
                            code=ErrorCodes.arakoon_connection_ok)
                    else:
                        result_handler.failure(
                            'Connection could not be established to {0}'.
                            format(identifier_log),
                            code=ErrorCodes.arakoon_connection_failure)

    @classmethod
    def _get_port_connections(cls,
                              result_handler,
                              arakoon_clusters,
                              batch_size=10):
        """
        Retrieve tlog/tlx stat information for a Arakoon cluster concurrently
        Note: this will mutate the given arakoon_clusters dict
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param arakoon_clusters: Information about all arakoon clusters, sorted by type and given config
        :type arakoon_clusters: dict
        :param batch_size: Amount of workers to collect the Arakoon information.
        Every worker will initiate a connection
        :return: Dict with tlog/tlx contents for every node config
        Example return:
        {CFG: {ovs.extensions.db.arakooninstaller.ArakoonClusterConfig object: {ovs_extensions.db.arakoon.arakooninstaller.ArakoonNodeConfig object: {'result': True,
                                                                                                                                                     'errors': []},
                                                                                ovs_extensions.db.arakoon.arakooninstaller.ArakoonNodeConfig object: {'result': False,
                                                                                                                                                      'errors': []}}}
        :rtype: dict
        """
        queue = Queue.Queue()
        # Prep work
        for cluster_type, clusters in arakoon_clusters.iteritems():
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                arakoon_config = cluster['config']
                cluster['connection_result'] = {}
                for node_config in arakoon_config.nodes:
                    result = {'errors': [], 'result': False}
                    cluster['connection_result'][node_config] = result
                    queue.put((cluster_name, node_config, result))

        for _ in xrange(batch_size):
            thread = Thread(target=cls._connection_worker,
                            args=(queue, result_handler))
            thread.setDaemon(
                True
            )  # Setting threads as "daemon" allows main program to exit eventually even if these don't finish correctly.
            thread.start()
        # Wait for all results
        queue.join()
        return arakoon_clusters

    @staticmethod
    def _connection_worker(queue, result_handler):
        """
        Worker method to retrieve file descriptors
        :param queue: Queue to use
        :param result_handler: Logging object
        :return: None
        :rtype: NoneType
        """
        while not queue.empty():
            cluster_name, _node_config, _results = queue.get()
            errors = _results['errors']
            identifier = 'Arakoon cluster {0} on node {1}'.format(
                cluster_name, _node_config.ip)
            result_handler.info(
                'Testing the connection to {0}'.format(identifier),
                add_to_result=False)
            try:
                _results['result'] = NetworkHelper.check_port_connection(
                    _node_config.client_port, _node_config.ip)
            except Exception as ex:
                errors.append(('test_connection', ex))
                result_handler.warning(
                    'Could not test the connection to {0} ({1})'.format(
                        identifier, str(ex)),
                    add_to_result=False)
            finally:
                queue.task_done()

    @classmethod
    @cluster_check
    @expose_to_cli(MODULE,
                   'collapse-test',
                   HealthCheckCLI.ADDON_TYPE,
                   help='Verifies collapsing has occurred for all Arakoons',
                   short_help='Test if Arakoon collapsing is not failing')
    @expose_to_cli.option('--max-collapse-age',
                          '-a',
                          type=int,
                          default=3,
                          help='Maximum age in days for TLX')
    @expose_to_cli.option('--min-tlx-amount',
                          '-t',
                          type=int,
                          default=10,
                          help='Minimum amount of TLX files before testing')
    def check_collapse(cls,
                       result_handler,
                       max_collapse_age=3,
                       min_tlx_amount=10):
        """
        Verifies collapsing has occurred for all Arakoons
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param max_collapse_age: tlx files may not be longer than x days
        :type max_collapse_age: int
        :param min_tlx_amount: Minimum amount of tlxes before making collapsing mandatory (defaults to 10)
        :type min_tlx_amount: int
        :return: None
        :rtype: NoneType
        """
        arakoon_clusters = cls._get_arakoon_clusters(result_handler)
        result_handler.info('Starting Arakoon collapse test',
                            add_to_result=False)
        max_age_seconds = timedelta(days=max_collapse_age).total_seconds()
        result_handler.info(
            'Retrieving all collapsing statistics. This might take a while',
            add_to_result=False)
        start = time.time()
        arakoon_stats = cls._retrieve_stats(result_handler, arakoon_clusters)
        result_handler.info(
            'Retrieving all collapsing statistics succeeded (duration: {0})'.
            format(time.time() - start),
            add_to_result=False)
        for cluster_type, clusters in arakoon_stats.iteritems():
            result_handler.info(
                'Testing the collapse of {0} Arakoons'.format(cluster_type),
                add_to_result=False)
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                collapse_result = cluster['collapse_result']
                collapse_result = OrderedDict(
                    sorted(collapse_result.items(),
                           key=lambda item: ExtensionsToolbox.advanced_sort(
                               item[0].ip, separator='.')))
                for node, stats in collapse_result.iteritems():
                    identifier_log = 'Arakoon cluster {0} on node {1}'.format(
                        cluster_name, node.ip)
                    if len(stats['errors']) > 0:
                        # Determine where issues were found
                        for step, exception in stats['errors']:
                            if step == 'build_client':
                                try:
                                    # Raise the thrown exception
                                    raise exception
                                except TimeOutException:
                                    result_handler.warning(
                                        'Connection to {0} has timed out'.
                                        format(identifier_log),
                                        code=ErrorCodes.ssh_connection_time)
                                except (socket.error,
                                        UnableToConnectException):
                                    result_handler.failure(
                                        'Connection to {0} could not be established'
                                        .format(identifier_log),
                                        code=ErrorCodes.ssh_connection_fail)
                                except NotAuthenticatedException:
                                    result_handler.skip(
                                        'Connection to {0} could not be authenticated. This node has no access to the Arakoon node.'
                                        .format(identifier_log),
                                        code=ErrorCodes.
                                        ssh_connection_authentication)
                                except Exception:
                                    message = 'Connection to {0} could not be established due to an unhandled exception.'.format(
                                        identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(
                                        message,
                                        code=ErrorCodes.unhandled_exception)
                            elif step == 'stat_dir':
                                try:
                                    raise exception
                                except Exception:
                                    message = 'Unable to list the contents of the tlog directory ({0}) for {1}'.format(
                                        node.tlog_dir, identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(
                                        message,
                                        code=ErrorCodes.unhandled_exception)
                        continue
                    tlx_files = stats['result']['tlx']
                    tlog_files = stats['result']['tlog']
                    headdb_files = stats['result']['headDB']
                    avail_size = stats['result']['avail_size']

                    if any(item is None
                           for item in [tlx_files, tlog_files, avail_size]):
                        # Exception occurred but no errors were logged
                        result_handler.exception(
                            'Either the tlx or tlog files or available size could be found in/of the tlog directory ({0}) for {1}'
                            .format(node.tlog_dir, identifier_log),
                            code=ErrorCodes.tlx_tlog_not_found)
                        continue
                    if len(headdb_files) > 0:
                        headdb_size = sum([int(i[2]) for i in headdb_files])
                        collapse_size_msg = 'Spare space for local collapse is'
                        if avail_size >= headdb_size * 4:
                            result_handler.success(
                                '{0} sufficient (n > 4x head.db size)'.format(
                                    collapse_size_msg))
                        elif avail_size >= headdb_size * 3:
                            result_handler.warning(
                                '{0} running short (n > 3x head.db size)'.
                                format(collapse_size_msg))
                        elif avail_size >= headdb_size * 2:
                            result_handler.failure(
                                '{0} just enough (n > 2x head.db size'.format(
                                    collapse_size_msg))
                        else:
                            result_handler.failure(
                                '{0} insufficient (n <2 x head.db size'.format(
                                    collapse_size_msg))

                    if len(tlog_files) == 0:
                        # A tlog should always be present
                        result_handler.failure(
                            '{0} has no open tlog'.format(identifier_log),
                            code=ErrorCodes.tlog_not_found)
                        continue
                    if len(tlx_files) < min_tlx_amount:
                        result_handler.skip(
                            '{0} only has {1} tlx, not worth collapsing (required: {2})'
                            .format(identifier_log, len(tlx_files),
                                    min_tlx_amount))
                        continue
                    # Compare youngest tlog and oldest tlx timestamp
                    seconds_difference = int(tlog_files[-1][0]) - int(
                        tlx_files[0][0])
                    if max_age_seconds > seconds_difference:
                        result_handler.success(
                            '{0} should not be collapsed. The oldest tlx is at least {1} days younger than the youngest tlog (actual age: {2})'
                            .format(
                                identifier_log, max_collapse_age,
                                str(timedelta(seconds=seconds_difference))),
                            code=ErrorCodes.collapse_ok)
                    else:
                        result_handler.failure(
                            '{0} should be collapsed. The oldest tlx is currently {1} old'
                            .format(
                                identifier_log,
                                str(timedelta(seconds=seconds_difference))),
                            code=ErrorCodes.collapse_not_ok)

    @classmethod
    def _retrieve_stats(cls, result_handler, arakoon_clusters, batch_size=10):
        """
        Retrieve tlog/tlx stat information for a Arakoon cluster concurrently
        Note: this will mutate the given arakoon_clusters dict
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param arakoon_clusters: Information about all arakoon clusters, sorted by type and given config
        :type arakoon_clusters: dict
        :param batch_size: Amount of workers to collect the Arakoon information.
        Every worker means a connection towards a different node
        :return: Dict with tlog/tlx contents for every node config
        Example return:
        {CFG: {ovs.extensions.db.arakooninstaller.ArakoonClusterConfig object: {ovs_extensions.db.arakoon.arakooninstaller.ArakoonNodeConfig object: {'result': {'tlx': [['1513174398', '/opt/OpenvStorage/db/arakoon/config/tlogs/3393.tlx']],
                                                                                                                                                                'tlog': [['1513178427', '/opt/OpenvStorage/db/arakoon/config/tlogs/3394.tlog']]},
                                                                                                                                                     'errors': []},
                                                                                ovs_extensions.db.arakoon.arakooninstaller.ArakoonNodeConfig object: {'result': {'tlx': [['1513166090', '/opt/OpenvStorage/db/arakoon/config/tlogs/3392.tlx'], ['1513174418', '/opt/OpenvStorage/db/arakoon/config/tlogs/3393.tlx']],
                                                                                                                                                                'tlog': [['1513178427', '/opt/OpenvStorage/db/arakoon/config/tlogs/3394.tlog']]}, 'errors': []}, <ovs_extensions.db.arakoon.arakooninstaller.ArakoonNodeConfig object at 0x7fb3a84db090>: {'output': {'tlx': [['1513174358', '/opt/OpenvStorage/db/arakoon/config/tlogs/3393.tlx']], 'tlog': [['1513178427', '/opt/OpenvStorage/db/arakoon/config/tlogs/3394.tlog']]},
                                                                                                                                                      'errors': []}}}
        :rtype: dict
        """
        queue = Queue.Queue()
        clients = {}
        # Prep work
        for cluster_type, clusters in arakoon_clusters.iteritems():
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                arakoon_config = cluster['config']
                cluster['collapse_result'] = {}
                for node_config in arakoon_config.nodes:
                    result = {
                        'errors': [],
                        'result': {
                            'tlx': [],
                            'tlog': [],
                            'headDB': [],
                            'avail_size': None
                        }
                    }
                    cluster['collapse_result'][node_config] = result
                    # Build SSHClients outside the threads to avoid GIL
                    try:
                        client = clients.get(node_config.ip)
                        if client is None:
                            client = SSHClient(node_config.ip, timeout=5)
                            clients[node_config.ip] = client
                    except Exception as ex:
                        result['errors'].append(('build_client', ex))
                        continue
                    queue.put((cluster_name, node_config, result))
        # Limit to one session for every node.
        # Every process will fork from this one, creating a new session instead of using the already existing channel
        # There might be an issue issue if a ssh session would take too long causing all workers to connect to that one node
        # and therefore hitting the MaxSessions again (theory)
        for _ in xrange(min(len(clients.keys()), batch_size)):
            thread = Thread(target=cls._collapse_worker,
                            args=(queue, clients, result_handler))
            thread.setDaemon(
                True
            )  # Setting threads as "daemon" allows main program to exit eventually even if these don't finish correctly.
            thread.start()
        # Wait for all results
        queue.join()
        return arakoon_clusters

    @staticmethod
    def _collapse_worker(queue, clients, result_handler):
        """
        Worker method to retrieve file descriptors
        :param queue: Queue to use
        :param clients: SSHClients to choose from
        :param result_handler: Logging object
        :return: None
        :rtype: NoneType
        """
        while not queue.empty():
            cluster_name, _node_config, _results = queue.get()
            errors = _results['errors']
            output = _results['result']
            identifier = 'Arakoon cluster {0} on node {1}'.format(
                cluster_name, _node_config.ip)
            result_handler.info(
                'Retrieving collapse information for {0}'.format(identifier),
                add_to_result=False)
            try:
                _client = clients[_node_config.ip]
                tlog_dir = _node_config.tlog_dir
                path = os.path.join(tlog_dir, '*')
                try:
                    # List the contents of the tlog directory and sort by oldest modification date
                    # Example output: (timestamp, name, size (bits)
                    # 01111 file.tlog 101
                    # 01112 file2.tlog 102
                    timestamp_files = _client.run(
                        'stat -c "%Y %n %s" {0}'.format(path),
                        allow_insecure=True)
                    output['avail_size'] = _client.run(
                        "df {0} | tail -1 | awk '{{print $4}}'".format(path),
                        allow_insecure=True)
                except Exception as _ex:
                    errors.append(('stat_dir', _ex))
                    raise
                # Sort and separate the timestamp item files
                for split_entry in sorted(
                    (timestamp_file.split()
                     for timestamp_file in timestamp_files.splitlines()),
                        key=lambda split: int(split[0])):
                    file_name = split_entry[1]
                    if file_name.endswith('tlx'):
                        output['tlx'].append(split_entry)
                    elif file_name.endswith('tlog'):
                        output['tlog'].append(split_entry)
                    elif file_name.rsplit('/')[-1].startswith('head.db'):
                        output['headDB'].append(split_entry)
            except Exception as _ex:
                result_handler.warning(
                    'Could not retrieve the collapse information for {0} ({1})'
                    .format(identifier, str(_ex)),
                    add_to_result=False)
            finally:
                queue.task_done()

    @classmethod
    @cluster_check
    @expose_to_cli(
        MODULE,
        'integrity-test',
        HealthCheckCLI.ADDON_TYPE,
        help=
        'Verifies that all Arakoon clusters are still responding to client calls',
        short_help='Test if Arakoon clusters are still responding')
    def verify_integrity(cls, result_handler):
        """
        Verifies that all Arakoon clusters are still responding to client calls
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return: None
        :rtype: NoneType
        """
        arakoon_cluster = cls._get_arakoon_clusters(result_handler)
        result_handler.info('Starting Arakoon integrity test',
                            add_to_result=False)
        for cluster_type, clusters in arakoon_cluster.iteritems():
            result_handler.info(
                'Testing the integry of {0} Arakoons'.format(cluster_type),
                add_to_result=False)
            for cluster in clusters:
                arakoon_client = cluster['client']
                cluster_name = cluster['cluster_name']
                try:
                    arakoon_client.nop()
                    result_handler.success(
                        'Arakoon {0} responded'.format(cluster_name),
                        code=ErrorCodes.arakoon_responded)
                except (ArakoonNoMaster, ArakoonNoMasterResult) as ex:
                    result_handler.failure(
                        'Arakoon {0} cannot find a master. (Message: {1})'.
                        format(cluster_name, str(ex)),
                        code=ErrorCodes.master_none)
                except Exception as ex:
                    cls.logger.exception(
                        'Unhandled exception during the integrity check')
                    result_handler.exception(
                        'Arakoon {0} threw an unhandled exception. (Message: {1})'
                        .format(cluster_name, str(ex)),
                        code=ErrorCodes.unhandled_exception)

    @classmethod
    @cluster_check
    @expose_to_cli(
        MODULE,
        'file-descriptors-test',
        HealthCheckCLI.ADDON_TYPE,
        help=
        'Verify the number of File Descriptors on every Arakoon does not exceed the limit',
        short_help='Test if #FD does not exceed the limit')
    @expose_to_cli.option(
        '--fd-limit',
        '-l',
        type=int,
        default=30,
        help=
        'Threshold for the number number of tcp connections for which to start logging warnings'
    )
    def check_arakoon_fd(cls,
                         result_handler,
                         fd_limit=30,
                         passed_connections=None):
        """
        Checks all current open tcp file descriptors for all Arakoon clusters in the OVS cluster
        Will raise warnings when these reach a certain threshold
        :param result_handler: Logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param fd_limit: Threshold for the number number of tcp connections for which to start logging warnings
        :type fd_limit: int
        :param passed_connections: checked TCP connections
        :type passed_connections: list
        :return: None
        :rtype: NoneType
        """
        if passed_connections is None:
            passed_connections = ['ESTABLISHED', 'TIME_WAIT']
        warning_threshold = fd_limit * 80 / 100
        error_threshold = fd_limit * 95 / 100

        result_handler.info('Starting Arakoon integrity test',
                            add_to_result=False)
        arakoon_clusters = cls._get_arakoon_clusters(result_handler)
        start = time.time()
        arakoon_fd_results = cls._get_filedescriptors(result_handler,
                                                      arakoon_clusters)
        result_handler.info(
            'Retrieving all file descriptor information succeeded (duration: {0})'
            .format(time.time() - start),
            add_to_result=False)
        for cluster_type, clusters in arakoon_fd_results.iteritems():
            result_handler.info(
                'Checking the file descriptors of {0} Arakoons'.format(
                    cluster_type),
                add_to_result=False)
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                fd_result = cluster['fd_result']
                fd_result = OrderedDict(
                    sorted(fd_result.items(),
                           key=lambda item: ExtensionsToolbox.advanced_sort(
                               item[0].ip, separator='.')))
                for node, stats in fd_result.iteritems():
                    identifier_log = 'Arakoon cluster {0} on node {1}'.format(
                        cluster_name, node.ip)
                    if len(stats['errors']) > 0:
                        # Determine where issues were found
                        for step, exception in stats['errors']:
                            if step == 'build_client':
                                try:
                                    # Raise the thrown exception
                                    raise exception
                                except TimeOutException:
                                    result_handler.warning(
                                        'Connection to {0} has timed out'.
                                        format(identifier_log),
                                        code=ErrorCodes.ssh_connection_time)
                                except (socket.error,
                                        UnableToConnectException):
                                    result_handler.failure(
                                        'Connection to {0} could not be established'
                                        .format(identifier_log),
                                        code=ErrorCodes.ssh_connection_fail)
                                except NotAuthenticatedException:
                                    result_handler.skip(
                                        'Connection to {0} could not be authenticated. This node has no access to the Arakoon node.'
                                        .format(identifier_log),
                                        code=ErrorCodes.
                                        ssh_connection_authentication)
                                except Exception:
                                    message = 'Connection to {0} could not be established due to an unhandled exception.'.format(
                                        identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(
                                        message,
                                        code=ErrorCodes.unhandled_exception)
                            elif step == 'lsof':
                                try:
                                    raise exception
                                except Exception:
                                    message = 'Unable to list the file descriptors for {0}'.format(
                                        identifier_log)
                                    cls.logger.exception(message)
                                    result_handler.exception(
                                        message,
                                        ErrorCodes.unhandled_exception)
                        continue
                    fds = stats['result']['fds']
                    filtered_fds = [
                        i for i in fds if i.split()[-1].strip('(').strip(')')
                        in passed_connections
                    ]
                    if len(filtered_fds) >= warning_threshold:
                        if len(filtered_fds) >= error_threshold:
                            result_handler.warning(
                                'Number of TCP connections exceeded the 95% warning threshold for {0}, ({1}/{2})'
                                .format(identifier_log, len(filtered_fds),
                                        fd_limit),
                                code=ErrorCodes.arakoon_fd_95)
                        else:
                            result_handler.warning(
                                'Number of TCP connections exceeded the 80% warning threshold for {0}, ({1}/{2})'
                                .format(identifier_log, len(filtered_fds),
                                        fd_limit),
                                code=ErrorCodes.arakoon_fd_80)
                    else:
                        result_handler.success(
                            'Number of TCP connections for {0} is healthy ({1}/{2})'
                            .format(identifier_log, len(filtered_fds),
                                    fd_limit),
                            code=ErrorCodes.arakoon_fd_ok)

    @classmethod
    def _get_filedescriptors(cls,
                             result_handler,
                             arakoon_clusters,
                             batch_size=10):
        """
        Retrieve tlog/tlx stat information for a Arakoon cluster concurrently
        Note: this will mutate the given arakoon_clusters dict
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :param arakoon_clusters: Information about all Arakoon clusters, sorted by type and given config
        :type arakoon_clusters: dict
        :param batch_size: Amount of workers to collect the Arakoon information.
        Every worker means a connection towards a different node
        :return: Dict with file descriptors contents for every node config
        :rtype: dict
        """
        queue = Queue.Queue()
        clients = {}
        # Prep work
        for cluster_type, clusters in arakoon_clusters.iteritems():
            for cluster in clusters:
                cluster_name = cluster['cluster_name']
                arakoon_config = cluster['config']
                cluster['fd_result'] = {}
                for node_config in arakoon_config.nodes:
                    result = {'errors': [], 'result': {'fds': []}}
                    # Build SSHClients outside the threads to avoid GIL
                    try:
                        client = clients.get(node_config.ip)
                        if client is None:
                            client = SSHClient(node_config.ip, timeout=5)
                            clients[node_config.ip] = client
                    except Exception as ex:
                        result['errors'].append(('build_client', ex))
                        continue
                    cluster['fd_result'][node_config] = result
                    queue.put((cluster_name, node_config, result))
        service_manager = ServiceFactory.get_manager()
        # Limit to one session for every node.
        # Every process will fork from this one, creating a new session instead of using the already existing channel
        # There might be an issue issue if a ssh session would take too long causing all workers to connect to that one node
        # and therefore hitting the MaxSessions again (theory)
        for _ in xrange(min(len(clients.keys()), batch_size)):
            thread = Thread(target=cls._fd_worker,
                            args=(queue, clients, result_handler,
                                  service_manager))
            thread.setDaemon(
                True
            )  # Setting threads as "daemon" allows main program to exit eventually even if these don't finish correctly.
            thread.start()
        # Wait for all results
        queue.join()
        return arakoon_clusters

    @staticmethod
    def _fd_worker(queue, clients, result_handler, service_manager):
        """
        Worker method to retrieve file descriptors
        :param queue: Queue to use
        :param clients: SSHClients to choose from
        :param result_handler: Logging object
        :param service_manager: Service manager instance
        :return: None
        :rtype: NoneType
        """
        while not queue.empty():
            cluster_name, _node_config, _results = queue.get(False)
            errors = _results['errors']
            output = _results['result']
            identifier = 'Arakoon cluster {0} on node {1}'.format(
                cluster_name, _node_config.ip)
            result_handler.info(
                'Retrieving file descriptor information for {0}'.format(
                    identifier),
                add_to_result=False)
            try:
                client = clients[_node_config.ip]
                try:
                    # Handle config Arakoon
                    cluster_name = cluster_name if cluster_name != 'cacc' else 'config'
                    service_name = ArakoonInstaller.get_service_name_for_cluster(
                        cluster_name)
                    pid = service_manager.get_service_pid(service_name, client)
                    file_descriptors = client.run(
                        ['lsof', '-i', '-a', '-p', pid]).splitlines()[1:]
                except Exception as _ex:
                    errors.append(('lsof', _ex))
                    raise
                output['fds'] = file_descriptors
            except Exception as _ex:
                result_handler.warning(
                    'Could not retrieve the file descriptor information for {0} ({1})'
                    .format(identifier, str(_ex)),
                    add_to_result=False)
            finally:
                queue.task_done()

Пример #7

Показать файл

class CLI(click.Group):
    """
    Click CLI which dynamically loads all possible commands
    Implementations require an entry point
    An entry point is defined as:
    @click.group(cls=CLI)
    def entry_point():
        pass

    if __name__ == '__main__':
        entry_point()
    """
    ADDON_TYPE = 'ovs'  # Type of addon the CLI is
    CACHE_KEY = 'ovs_discover_method'
    CACHE_EXPIRE_HOURS = 2  # Amount of hours the cache would expire
    GROUP_MODULE_CLASS = click.Group
    CMD_FOLDER = os.path.join(os.path.dirname(__file__))  # Folder to query for commands

    logger = Logger("ovs_clirunner")
    _volatile_client = VolatileFactory.get_client()
    _discovery_cache = {}

    def __init__(self, *args, **kwargs):
        # type: (*any, **any) -> None
        super(CLI, self).__init__(*args, **kwargs)

    def list_commands(self, ctx):
        # type: (click.Context) -> list[str]
        """
        Lists all possible commands found within the directory of this file
        All modules are retrieved
        :param ctx: Passed context
        :return: List of files to look for commands
        """
        _ = ctx
        sub_commands = self._discover_methods().keys()  # Returns all underlying modules
        sub_commands.sort()
        return sub_commands

    def get_command(self, ctx, name):
        # type: (click.Context, str) -> callable
        """
        Retrieves a command to execute
        :param ctx: Passed context
        :param name: Name of the command
        :return: Function pointer to the command or None when no import could happen
        :rtype: callable
        """
        cmd = self.commands.get(name)
        if cmd:
            return cmd
        # More extensive - build the command and register
        discovery_data = self._discover_methods()
        if name in discovery_data.keys():
            # The current passed name is a module. Wrap it up in a group and add all commands under it dynamically
            module_commands = {}
            for function_name, function_data in discovery_data[name].iteritems():
                # Register the decorated function as callback to click
                # Try to avoid name collision with other modules. Might lead to unexpected results
                mod = imp.load_source('ovs_cli_{0}'.format(function_data['module_name']), function_data['location'])
                cl = getattr(mod, function_data['class'])()
                module_commands[function_name] = click.Command(function_name, callback=getattr(cl, function_data['function']))
            ret = self.GROUP_MODULE_CLASS(name, module_commands)
            self.add_command(ret)
            return ret

    @classmethod
    def _discover_methods(cls):
        # type: () -> dict
        """
        Discovers all methods with the expose_to_cli decorator
        :return: dict that contains the required info based on module_name and method_name
        :rtype: dict
        """
        version_id = 1
        start_path = cls.CMD_FOLDER
        addon_type = cls.ADDON_TYPE

        def discover():
            """
            Build a dict listing all discovered methods with @expose_to_cli
            :return:  Dict with all discovered itms
            :rtype: dict
            """
            # Build cache
            found_items = {'expires': time.time() + cls.CACHE_EXPIRE_HOURS * 60 ** 2}
            path = start_path
            for root, dirnames, filenames in os.walk(path):
                for filename in filenames:
                    if not (filename.endswith('.py') and filename != '__init__.py'):
                        continue
                    file_path = os.path.join(root, filename)
                    module_name = 'ovs_cli_{0}'.format(filename.replace('.py', ''))
                    # Import file, making it relative to the start path to avoid name collision.
                    # Without it, the module contents would be merged (eg. alba.py and testing/alba.py would be merged, overriding the path
                    # imp.load_source is different from importing. Therefore using the relative-joined name is safe
                    try:
                        mod = imp.load_source(module_name, file_path)
                    except ImportError:
                        cls.logger.exception('Unable to import module at {0}'.format(file_path))
                        continue
                    for member_name, member_value in inspect.getmembers(mod):
                        if not (inspect.isclass(member_value) and member_value.__module__ == module_name and 'object' in [base.__name__ for base in member_value.__bases__]):
                            continue
                        for submember_name, submember_value in inspect.getmembers(member_value):
                            if not hasattr(submember_value, expose_to_cli.attribute):
                                continue
                            exposed_data = getattr(submember_value, expose_to_cli.attribute)
                            method_module_name = exposed_data['module_name']
                            method_name = exposed_data['method_name']
                            method_addon_type = exposed_data['addon_type'] if 'addon_type' in exposed_data else None
                            if method_module_name not in found_items:
                                found_items[method_module_name] = {}
                            # Only return when the addon type matches
                            if method_addon_type == addon_type:
                                function_metadata = {'function': submember_value.__name__,
                                                     'class': member_value.__name__,
                                                     'location': file_path,
                                                     'version': version_id}
                                function_metadata.update(exposed_data)  # Add all exposed data for further re-use
                                found_items[method_module_name][method_name] = function_metadata
            return found_items

        def get_and_cache():
            found_items = cls._volatile_client.get(cls.CACHE_KEY)
            if found_items:
                cls._discovery_cache.update(found_items)
            return found_items

        try:
            exposed_methods = copy.deepcopy(cls._discovery_cache) or get_and_cache()
            if exposed_methods and exposed_methods['expires'] > time.time():
                # Able to use the cache, has not expired yet
                del exposed_methods['expires']
                return exposed_methods
        except Exception:
            cls.logger.exception('Unable to retrieve the exposed resources from cache')
        exposed_methods = discover()
        try:
            cls._discovery_cache = exposed_methods
            cls._volatile_client.set(cls.CACHE_KEY, exposed_methods)
        except Exception:
            cls.logger.exception('Unable to cache the exposed resources')
        del exposed_methods['expires']
        return exposed_methods

    @classmethod
    def clear_cache(cls):
        # type: () -> None
        """
        Clear all cache related to discovering methods
        :return: None
        :rtype: NoneType
        """
        cls._volatile_client.delete(cls.CACHE_KEY)

Пример #8

Показать файл

class IPMIHealthCheck(object):
    """
    Healthcheck file to execute multiple IPMI tests
    """
    MODULE = 'ipmi'
    logger = Logger("healthcheck-healthcheck_ipmi")

    @classmethod
    @expose_to_cli(MODULE,
                   'ipmi-test',
                   HealthCheckCLI.ADDON_TYPE,
                   help='Verify that AlbaNodes can be controlled through IPMI',
                   short_help='Test if AlbaNodes their IPMI info is correct')
    def ipmi_check(cls, result_handler):
        """
        :param result_handler: logging object
        :type result_handler: ovs.extensions.healthcheck.result.HCResults
        :return:
        """
        for albanode in AlbaNodeList.get_albanodes():
            node_id = albanode.node_id
            ipmi_config_loc = '/ovs/alba/asdnodes/{0}/config/ipmi'.format(
                node_id)
            if not Configuration.exists(ipmi_config_loc):
                result_handler.skip(
                    'No IPMI info found on AlbaNode with ID {0}'.format(
                        node_id))
                continue
            ipmi_config = Configuration.get(ipmi_config_loc)
            ip = ipmi_config.get('ip')
            try:
                controller = IPMIController(
                    ip=ip,
                    username=ipmi_config.get('username'),
                    password=ipmi_config.get('password'),
                    client=SSHClient(System.get_my_storagerouter()))
            except:
                result_handler.failure(
                    'IPMI settings are not valid for AlbaNode with ID {0}'.
                    format(node_id))
                continue
            try:
                status = controller.status_node().get(ip)
                if status == IPMIController.IPMI_POWER_ON:
                    result_handler.success(
                        'IPMI AlbaNode with ID {0} status is POWER ON'.format(
                            node_id))
                elif status == IPMIController.IPMI_POWER_OFF:
                    result_handler.warning(
                        'IPMI AlbaNode with ID {0} status is POWER OFF'.format(
                            node_id))
            except IPMITimeOutException as ex:
                result_handler.failure(
                    "IPMI AlbaNode with ID {0} timed out: '{1}'".format(
                        node_id, ex))
            except IPMICallException as ex:
                result_handler.failure(
                    "IPMI AlbaNode with ID {0} call failed: '{1}'".format(
                        node_id, ex))
            except Exception:
                msg = 'Could not retrieve info through IPMI for AlbaNode with ID {0}'.format(
                    node_id)
                cls.logger.exception(msg)
                result_handler.exception(msg)