示例#1
0
    def remove_arakoon_cluster(cluster_name, master_storagerouter_ip):
        """
        Delete a whole arakoon cluster

        :param cluster_name: name of a existing arakoon cluster
        :type cluster_name: str
        :param master_storagerouter_ip: master ip address of a existing arakoon cluster
        :type master_storagerouter_ip: str
        """
        ArakoonRemover.LOGGER.info(
            "Starting removing arakoon cluster with name `{0}`, master_ip `{1}`"
            .format(cluster_name, master_storagerouter_ip))
        arakoon_installer = ArakoonInstaller(cluster_name)
        arakoon_installer.load()
        arakoon_installer.delete_cluster()
        ArakoonRemover.LOGGER.info(
            "Finished removing arakoon cluster with name `{0}`, master_ip `{1}`"
            .format(cluster_name, master_storagerouter_ip))
示例#2
0
 def _on_demote(cluster_ip, master_ip, offline_node_ips=None):
     """
     Handles the demote for the StorageDrivers
     :param cluster_ip: IP of the node to demote
     :type cluster_ip: str
     :param master_ip: IP of the master node
     :type master_ip: str
     :param offline_node_ips: IPs of nodes which are offline
     :type offline_node_ips: list
     :return: None
     """
     _ = master_ip
     if offline_node_ips is None:
         offline_node_ips = []
     servicetype = ServiceTypeList.get_by_name(
         ServiceType.SERVICE_TYPES.ARAKOON)
     current_service = None
     remaining_ips = []
     for service in servicetype.services:
         if service.name == 'arakoon-voldrv' and service.is_internal is True:  # Externally managed arakoon cluster services do not have StorageRouters
             if service.storagerouter.ip == cluster_ip:
                 current_service = service
             elif service.storagerouter.ip not in offline_node_ips:
                 remaining_ips.append(service.storagerouter.ip)
     if current_service is not None:
         if len(remaining_ips) == 0:
             raise RuntimeError(
                 'Could not find any remaining arakoon nodes for the voldrv cluster'
             )
         StorageDriverController._logger.debug(
             '* Shrink StorageDriver cluster')
         cluster_name = str(
             Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
         arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
         arakoon_installer.load()
         arakoon_installer.shrink_cluster(removal_ip=cluster_ip,
                                          offline_nodes=offline_node_ips)
         arakoon_installer.restart_cluster_after_shrinking()
         current_service.delete()
         StorageDriverController._configure_arakoon_to_volumedriver(
             cluster_name=cluster_name)
示例#3
0
    def shrink_vpool(cls,
                     storagedriver_guid,
                     offline_storage_router_guids=list()):
        """
        Removes a StorageDriver (if its the last StorageDriver for a vPool, the vPool is removed as well)
        :param storagedriver_guid: Guid of the StorageDriver to remove
        :type storagedriver_guid: str
        :param offline_storage_router_guids: Guids of StorageRouters which are offline and will be removed from cluster.
                                             WHETHER VPOOL WILL BE DELETED DEPENDS ON THIS
        :type offline_storage_router_guids: list
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        # TODO: Unit test individual pieces of code
        # Validations
        storagedriver = StorageDriver(storagedriver_guid)
        storagerouter = storagedriver.storagerouter
        cls._logger.info(
            'StorageDriver {0} - Deleting StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        vp_installer = VPoolInstaller(name=storagedriver.vpool.name)
        vp_installer.validate(storagedriver=storagedriver)

        sd_installer = StorageDriverInstaller(vp_installer=vp_installer,
                                              storagedriver=storagedriver)

        cls._logger.info(
            'StorageDriver {0} - Checking availability of related StorageRouters'
            .format(storagedriver.guid, storagedriver.name))
        sr_client_map = SSHClient.get_clients(endpoints=[
            sd.storagerouter for sd in vp_installer.vpool.storagedrivers
        ],
                                              user_names=['root'])
        sr_installer = StorageRouterInstaller(root_client=sr_client_map.get(
            storagerouter, {}).get('root'),
                                              storagerouter=storagerouter,
                                              vp_installer=vp_installer,
                                              sd_installer=sd_installer)

        offline_srs = sr_client_map.pop('offline')
        if sorted([sr.guid for sr in offline_srs
                   ]) != sorted(offline_storage_router_guids):
            raise RuntimeError('Not all StorageRouters are reachable')

        if storagerouter not in offline_srs:
            mtpt_pids = sr_installer.root_client.run(
                "lsof -t +D '/mnt/{0}' || true".format(
                    vp_installer.name.replace(r"'", r"'\''")),
                allow_insecure=True).splitlines()
            if len(mtpt_pids) > 0:
                raise RuntimeError(
                    'vPool cannot be deleted. Following processes keep the vPool mount point occupied: {0}'
                    .format(', '.join(mtpt_pids)))

        # Retrieve reachable StorageDrivers
        reachable_storagedrivers = []
        for sd in vp_installer.vpool.storagedrivers:
            if sd.storagerouter not in sr_client_map:
                # StorageRouter is offline
                continue

            sd_key = '/ovs/vpools/{0}/hosts/{1}/config'.format(
                vp_installer.vpool.guid, sd.storagedriver_id)
            if Configuration.exists(sd_key) is True:
                path = Configuration.get_configuration_path(sd_key)
                with remote(sd.storagerouter.ip,
                            [LocalStorageRouterClient]) as rem:
                    try:
                        lsrc = rem.LocalStorageRouterClient(path)
                        lsrc.server_revision(
                        )  # 'Cheap' call to verify whether volumedriver is responsive
                        cls._logger.info(
                            'StorageDriver {0} - Responsive StorageDriver {1} on node with IP {2}'
                            .format(storagedriver.guid, sd.name,
                                    sd.storagerouter.ip))
                        reachable_storagedrivers.append(sd)
                    except Exception as exception:
                        if not is_connection_failure(exception):
                            raise

        if len(reachable_storagedrivers) == 0:
            raise RuntimeError(
                'Could not find any responsive node in the cluster')

        # Start removal
        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.SHRINKING)
        else:
            vp_installer.update_status(status=VPool.STATUSES.DELETING)

        # Clean up stale vDisks
        cls._logger.info('StorageDriver {0} - Removing stale vDisks'.format(
            storagedriver.guid))
        VDiskController.remove_stale_vdisks(vpool=vp_installer.vpool)

        # Reconfigure the MDSes
        cls._logger.info('StorageDriver {0} - Reconfiguring MDSes'.format(
            storagedriver.guid))
        for vdisk_guid in storagerouter.vdisks_guids:
            try:
                MDSServiceController.ensure_safety(
                    vdisk_guid=vdisk_guid,
                    excluded_storagerouter_guids=[storagerouter.guid] +
                    offline_storage_router_guids)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - vDisk {1} - Ensuring MDS safety failed'
                    .format(storagedriver.guid, vdisk_guid))

        # Validate that all MDSes on current StorageRouter have been moved away
        # Ensure safety does not always throw an error, that's why we perform this check here instead of in the Exception clause of above code
        vdisks = []
        for mds in vp_installer.mds_services:
            for junction in mds.vdisks:
                vdisk = junction.vdisk
                if vdisk in vdisks:
                    continue
                vdisks.append(vdisk)
                cls._logger.critical(
                    'StorageDriver {0} - vDisk {1} {2} - MDS Services have not been migrated away'
                    .format(storagedriver.guid, vdisk.guid, vdisk.name))
        if len(vdisks) > 0:
            # Put back in RUNNING, so it can be used again. Errors keep on displaying in GUI now anyway
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
            raise RuntimeError(
                'Not all MDS Services have been successfully migrated away')

        # Start with actual removal
        errors_found = False
        if storagerouter not in offline_srs:
            errors_found &= sd_installer.stop_services()

        errors_found &= vp_installer.configure_cluster_registry(
            exclude=[storagedriver], apply_on=reachable_storagedrivers)
        errors_found &= vp_installer.update_node_distance_map()
        errors_found &= vp_installer.remove_mds_services()
        errors_found &= sd_installer.clean_config_management()
        errors_found &= sd_installer.clean_model()

        if storagerouter not in offline_srs:
            errors_found &= sd_installer.clean_directories(
                mountpoints=StorageRouterController.get_mountpoints(
                    client=sr_installer.root_client))

            try:
                DiskController.sync_with_reality(
                    storagerouter_guid=storagerouter.guid)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - Synchronizing disks with reality failed'
                    .format(storagedriver.guid))
                errors_found = True

        if vp_installer.storagedriver_amount > 1:
            # Update the vPool metadata and run DTL checkup
            vp_installer.vpool.metadata['caching_info'].pop(
                sr_installer.storagerouter.guid, None)
            vp_installer.vpool.save()

            try:
                VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                            ensure_single_timeout=600)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - DTL checkup failed for vPool {1} with guid {2}'
                    .format(storagedriver.guid, vp_installer.name,
                            vp_installer.vpool.guid))
        else:
            cls._logger.info(
                'StorageDriver {0} - Removing vPool from model'.format(
                    storagedriver.guid))
            # Clean up model
            try:
                vp_installer.vpool.delete()
            except Exception:
                errors_found = True
                cls._logger.exception(
                    'StorageDriver {0} - Cleaning up vPool from the model failed'
                    .format(storagedriver.guid))
            Configuration.delete('/ovs/vpools/{0}'.format(
                vp_installer.vpool.guid))

        cls._logger.info('StorageDriver {0} - Running MDS checkup'.format(
            storagedriver.guid))
        try:
            MDSServiceController.mds_checkup()
        except Exception:
            cls._logger.exception(
                'StorageDriver {0} - MDS checkup failed'.format(
                    storagedriver.guid))

        # Update vPool status
        if errors_found is True:
            if vp_installer.storagedriver_amount > 1:
                vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise RuntimeError(
                '1 or more errors occurred while trying to remove the StorageDriver. Please check the logs for more information'
            )

        if vp_installer.storagedriver_amount > 1:
            vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info(
            'StorageDriver {0} - Deleted StorageDriver {1}'.format(
                storagedriver.guid, storagedriver.name))

        if len(VPoolList.get_vpools()) == 0:
            cluster_name = ArakoonInstaller.get_cluster_name('voldrv')
            if ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
                    cluster_name=cluster_name)['internal'] is True:
                cls._logger.debug(
                    'StorageDriver {0} - Removing Arakoon cluster {1}'.format(
                        storagedriver.guid, cluster_name))
                try:
                    installer = ArakoonInstaller(cluster_name=cluster_name)
                    installer.load()
                    installer.delete_cluster()
                except Exception:
                    cls._logger.exception(
                        'StorageDriver {0} - Delete voldrv Arakoon cluster failed'
                        .format(storagedriver.guid))
                service_type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                for service in list(service_type.services):
                    if service.name == service_name:
                        service.delete()

        # Remove watcher volumedriver service if last StorageDriver on current StorageRouter
        if len(
                storagerouter.storagedrivers
        ) == 0 and storagerouter not in offline_srs:  # ensure client is initialized for StorageRouter
            try:
                if cls._service_manager.has_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client):
                    cls._service_manager.stop_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
                    cls._service_manager.remove_service(
                        ServiceFactory.SERVICE_WATCHER_VOLDRV,
                        client=sr_installer.root_client)
            except Exception:
                cls._logger.exception(
                    'StorageDriver {0} - {1} service deletion failed'.format(
                        storagedriver.guid,
                        ServiceFactory.SERVICE_WATCHER_VOLDRV))
示例#4
0
    def test_arakoon_collapse(self):
        """
        Test the Arakoon collapse functionality
        """
        # Set up the test
        structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1, 2]})
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]
        MockedSSHClient._run_returns[storagerouter_1.ip] = {}
        MockedSSHClient._run_returns[storagerouter_2.ip] = {}

        # Make sure we cover all Arakoon cluster types
        clusters_to_create = {
            ServiceType.ARAKOON_CLUSTER_TYPES.SD: [{
                'name': 'unittest-voldrv',
                'internal': True,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.CFG: [{
                'name': 'unittest-cacc',
                'internal': True,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.FWK: [{
                'name': 'unittest-ovsdb',
                'internal': True,
                'success': False
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.ABM: [{
                'name': 'unittest-cluster-1-abm',
                'internal': True,
                'success': False
            }, {
                'name': 'unittest-random-abm-name',
                'internal': False,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.NSM: [{
                'name': 'unittest-cluster-1-nsm_0',
                'internal': True,
                'success': True
            }]
        }
        self.assertEqual(
            first=sorted(clusters_to_create.keys()),
            second=sorted(ServiceType.ARAKOON_CLUSTER_TYPES.keys()),
            msg=
            'An Arakoon cluster type has been removed or added, please update this test accordingly'
        )

        # Create all Arakoon clusters and related services
        failed_clusters = []
        external_clusters = []
        successful_clusters = []
        for cluster_type, cluster_infos in clusters_to_create.iteritems():
            filesystem = cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG
            for cluster_info in cluster_infos:
                internal = cluster_info['internal']
                cluster_name = cluster_info['name']

                base_dir = DalHelper.CLUSTER_DIR.format(cluster_name)
                arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
                arakoon_installer.create_cluster(cluster_type=cluster_type,
                                                 ip=storagerouter_1.ip,
                                                 base_dir=base_dir,
                                                 internal=internal)
                arakoon_installer.start_cluster()
                arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip,
                                                 base_dir=base_dir)

                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                if cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.ALBA_MGR)
                elif cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.NS_MGR)
                else:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.ARAKOON)

                if internal is True:
                    DalHelper.create_service(
                        service_name=service_name,
                        service_type=service_type,
                        storagerouter=storagerouter_1,
                        ports=arakoon_installer.ports[storagerouter_1.ip])
                    DalHelper.create_service(
                        service_name=service_name,
                        service_type=service_type,
                        storagerouter=storagerouter_2,
                        ports=arakoon_installer.ports[storagerouter_2.ip])
                else:
                    DalHelper.create_service(service_name=service_name,
                                             service_type=service_type)

                    external_clusters.append(cluster_name)
                    continue

                if cluster_info['success'] is True:
                    if filesystem is True:
                        config_path = ArakoonClusterConfig.CONFIG_FILE.format(
                            cluster_name)
                    else:
                        config_path = Configuration.get_configuration_path(
                            ArakoonClusterConfig.CONFIG_KEY.format(
                                cluster_name))
                    MockedSSHClient._run_returns[storagerouter_1.ip][
                        'arakoon --collapse-local 1 2 -config {0}'.format(
                            config_path)] = None
                    MockedSSHClient._run_returns[storagerouter_2.ip][
                        'arakoon --collapse-local 2 2 -config {0}'.format(
                            config_path)] = None
                    successful_clusters.append(cluster_name)
                else:  # For successful False clusters we don't emulate the collapse, thus making it fail
                    failed_clusters.append(cluster_name)

        # Start collapse and make it fail for all clusters on StorageRouter 2
        SSHClient._raise_exceptions[storagerouter_2.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        GenericController.collapse_arakoon()

        # Verify all log messages for each type of cluster
        generic_logs = Logger._logs.get('lib', {})
        for cluster_name in successful_clusters + failed_clusters + external_clusters:
            collect_msg = (
                'DEBUG',
                'Collecting info for cluster {0}'.format(cluster_name))
            unreachable_msg = (
                'ERROR',
                'Could not collapse any cluster on {0} (not reachable)'.format(
                    storagerouter_2.name))
            end_collapse_msg = (
                'DEBUG', 'Collapsing cluster {0} on {1} completed'.format(
                    cluster_name, storagerouter_1.ip))
            start_collapse_msg = ('DEBUG',
                                  'Collapsing cluster {0} on {1}'.format(
                                      cluster_name, storagerouter_1.ip))
            failed_collapse_msg = (
                'ERROR', 'Collapsing cluster {0} on {1} failed'.format(
                    cluster_name, storagerouter_1.ip))
            messages_to_validate = []
            if cluster_name in successful_clusters:
                assert_function = self.assertIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(unreachable_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(end_collapse_msg)
            elif cluster_name in failed_clusters:
                assert_function = self.assertIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(unreachable_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(failed_collapse_msg)
            else:
                assert_function = self.assertNotIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(end_collapse_msg)

            for severity, message in messages_to_validate:
                if assert_function == self.assertIn:
                    assert_message = 'Expected to find log message: {0}'.format(
                        message)
                else:
                    assert_message = 'Did not expect to find log message: {0}'.format(
                        message)
                assert_function(member=message,
                                container=generic_logs,
                                msg=assert_message)
                if assert_function == self.assertIn:
                    self.assertEqual(
                        first=severity,
                        second=generic_logs[message],
                        msg='Log message {0} is of severity {1} expected {2}'.
                        format(message, generic_logs[message], severity))

        # Collapse should always have a 'finished' message since each cluster should be attempted to be collapsed
        for general_message in [
                'Arakoon collapse started', 'Arakoon collapse finished'
        ]:
            self.assertIn(member=general_message,
                          container=generic_logs,
                          msg='Expected to find log message: {0}'.format(
                              general_message))
示例#5
0
    def add_arakoon(cluster_name,
                    storagerouter_ip,
                    cluster_basedir,
                    service_type=ServiceType.ARAKOON_CLUSTER_TYPES.FWK):
        """
        Adds a external arakoon to a storagerouter

        :param cluster_name: name of the new arakoon cluster
        :type cluster_name: str
        :param service_type: type of plugin for arakoon (DEFAULT=ServiceType.ARAKOON_CLUSTER_TYPES.FWK)
            * FWK
            * ABM
            * NSM
        :type service_type: ovs.dal.hybrids.ServiceType.ARAKOON_CLUSTER_TYPES
        :param storagerouter_ip: ip of a storagerouter
        :type storagerouter_ip: str
        :param cluster_basedir: absolute path for the new arakoon cluster
        :type cluster_basedir: str
        :return:
        """
        client = SSHClient(storagerouter_ip, username='******')

        # create required directories
        if not client.dir_exists(cluster_basedir):
            client.dir_create(cluster_basedir)

        # determine plugin
        if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.FWK:
            plugins = None
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
            plugins = {
                AlbaController.ABM_PLUGIN: AlbaController.ALBA_VERSION_GET
            }
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
            plugins = {
                AlbaController.NSM_PLUGIN: AlbaController.ALBA_VERSION_GET
            }
        else:
            raise RuntimeError(
                "Incompatible Arakoon cluster type selected: {0}".format(
                    service_type))

        ArakoonSetup.LOGGER.info(
            "Starting creation of new arakoon cluster with name `{0}`, servicetype `{1}`, ip `{2}`, base_dir `{3}`"
            .format(cluster_name, service_type, storagerouter_ip,
                    cluster_basedir))
        arakoon_installer = ArakoonInstaller(cluster_name)
        arakoon_installer.create_cluster(
            cluster_type=service_type,
            ip=storagerouter_ip,
            base_dir=cluster_basedir,
            plugins=plugins,
            locked=False,
            internal=False,
            log_sinks=Logger.get_sink_path('automation_lib_arakoon_server'),
            crash_log_sinks=Logger.get_sink_path(
                'automation_lib_arakoon_server_crash'))
        if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
            client.run([
                'ln', '-s', '/usr/lib/alba/albamgr_plugin.cmxs',
                '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name)
            ])
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
            client.run([
                'ln', '-s', '/usr/lib/alba/nsm_host_plugin.cmxs',
                '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name)
            ])
        arakoon_installer.start_cluster()
        arakoon_installer.unclaim_cluster()
        ArakoonSetup.LOGGER.info(
            "Finished creation of new arakoon cluster with name `{0}`, servicetype `{1}`, ip `{2}`, base_dir `{3}`"
            .format(cluster_name, service_type, storagerouter_ip,
                    cluster_basedir))
示例#6
0
    def extend_arakoon(cluster_name,
                       master_storagerouter_ip,
                       storagerouter_ip,
                       cluster_basedir,
                       service_type=ServiceType.ARAKOON_CLUSTER_TYPES.FWK,
                       clustered_nodes=None):
        """
        Adds a external arakoon to a storagerouter

        :param cluster_name: name of the already existing arakoon cluster
        :type cluster_name: str
        :param master_storagerouter_ip: master ip address of the existing arakoon cluster
                                        e.g. 10.100.199.11
        :type master_storagerouter_ip: str
        :param storagerouter_ip: ip of a new storagerouter to extend to
                                 e.g. 10.100.199.12
        :type storagerouter_ip: str
        :param cluster_basedir: absolute path for the new arakoon cluster
        :type cluster_basedir: str
        :param service_type: type of plugin for arakoon (DEFAULT=ServiceType.ARAKOON_CLUSTER_TYPES.FWK)
            * FWK
            * ABM
            * NSM
        :type service_type: ovs.dal.hybrids.ServiceType.ARAKOON_CLUSTER_TYPES
        :param clustered_nodes: nodes who are available for the arakoon (including the to be extended_arakoon)
                                e.g. ['10.100.199.11', '10.100.199.12'] (DEFAULT=[])
        :type clustered_nodes: list
        :return: is created or not
        :rtype: bool
        """
        if clustered_nodes is None:
            clustered_nodes = []
        client = SSHClient(storagerouter_ip, username='******')

        # create required directories
        if not client.dir_exists(cluster_basedir):
            client.dir_create(cluster_basedir)

        ArakoonSetup.LOGGER.info(
            "Starting extending arakoon cluster with name `{0}`, master_ip `{1}`, slave_ip `{2}`, base_dir `{3}`"
            .format(cluster_name, master_storagerouter_ip, storagerouter_ip,
                    cluster_basedir))
        arakoon_installer = ArakoonInstaller(cluster_name)
        arakoon_installer.load()
        arakoon_installer.extend_cluster(
            new_ip=storagerouter_ip,
            base_dir=cluster_basedir,
            locked=False,
            log_sinks=Logger.get_sink_path('automation_lib_arakoon_server'),
            crash_log_sinks=Logger.get_sink_path(
                'automation_lib_arakoon_server_crash'))
        if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
            client.run([
                'ln', '-s', '/usr/lib/alba/albamgr_plugin.cmxs',
                '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name)
            ])
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
            client.run([
                'ln', '-s', '/usr/lib/alba/nsm_host_plugin.cmxs',
                '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name)
            ])

        # checking if we need to restart the given nodes
        if len(clustered_nodes) != 0:
            ArakoonSetup.LOGGER.info(
                "Trying to restart all given nodes of arakoon: {0}".format(
                    clustered_nodes, cluster_name))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=storagerouter_ip)
            ArakoonSetup.LOGGER.info(
                "Finished restarting all given nodes of arakoon: {0}".format(
                    clustered_nodes, cluster_name))

        ArakoonSetup.LOGGER.info(
            "Finished extending arakoon cluster with name `{0}`, master_ip `{1}`, slave_ip `{2}`, base_dir `{3}`"
            .format(cluster_name, master_storagerouter_ip, storagerouter_ip,
                    cluster_basedir))
示例#7
0
    def _voldrv_arakoon_checkup(create_cluster):
        def _add_service(service_storagerouter, arakoon_ports, service_name):
            """ Add a service to the storage router """
            new_service = Service()
            new_service.name = service_name
            new_service.type = service_type
            new_service.ports = arakoon_ports
            new_service.storagerouter = service_storagerouter
            new_service.save()
            return new_service

        current_ips = []
        current_services = []
        service_type = ServiceTypeList.get_by_name(
            ServiceType.SERVICE_TYPES.ARAKOON)
        cluster_name = Configuration.get(
            '/ovs/framework/arakoon_clusters').get('voldrv')
        if cluster_name is not None:
            arakoon_service_name = ArakoonInstaller.get_service_name_for_cluster(
                cluster_name=cluster_name)
            for service in service_type.services:
                if service.name == arakoon_service_name:
                    current_services.append(service)
                    if service.is_internal is True:
                        current_ips.append(service.storagerouter.ip)

        all_sr_ips = [
            storagerouter.ip
            for storagerouter in StorageRouterList.get_slaves()
        ]
        available_storagerouters = {}
        for storagerouter in StorageRouterList.get_masters():
            storagerouter.invalidate_dynamics(['partition_config'])
            if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0:
                available_storagerouters[storagerouter] = DiskPartition(
                    storagerouter.partition_config[DiskPartition.ROLES.DB][0])
            all_sr_ips.append(storagerouter.ip)

        if create_cluster is True and len(
                current_services) == 0:  # Create new cluster
            metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim(
                cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD)
            if metadata is None:  # No externally managed cluster found, we create 1 ourselves
                if not available_storagerouters:
                    raise RuntimeError(
                        'Could not find any Storage Router with a DB role')

                storagerouter, partition = available_storagerouters.items()[0]
                arakoon_voldrv_cluster = 'voldrv'
                arakoon_installer = ArakoonInstaller(
                    cluster_name=arakoon_voldrv_cluster)
                arakoon_installer.create_cluster(
                    cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD,
                    ip=storagerouter.ip,
                    base_dir=partition.folder,
                    log_sinks=LogHandler.get_sink_path(
                        'arakoon-server_{0}'.format(arakoon_voldrv_cluster)),
                    crash_log_sinks=LogHandler.get_sink_path(
                        'arakoon-server-crash_{0}'.format(
                            arakoon_voldrv_cluster)))
                arakoon_installer.start_cluster()
                ports = arakoon_installer.ports[storagerouter.ip]
                metadata = arakoon_installer.metadata
                current_ips.append(storagerouter.ip)
            else:
                ports = []
                storagerouter = None

            cluster_name = metadata['cluster_name']
            Configuration.set('/ovs/framework/arakoon_clusters|voldrv',
                              cluster_name)
            StorageDriverController._logger.info(
                'Claiming {0} managed arakoon cluster: {1}'.format(
                    'externally' if storagerouter is None else 'internally',
                    cluster_name))
            StorageDriverController._configure_arakoon_to_volumedriver(
                cluster_name=cluster_name)
            current_services.append(
                _add_service(
                    service_storagerouter=storagerouter,
                    arakoon_ports=ports,
                    service_name=ArakoonInstaller.get_service_name_for_cluster(
                        cluster_name=cluster_name)))

        cluster_name = Configuration.get(
            '/ovs/framework/arakoon_clusters').get('voldrv')
        if cluster_name is None:
            return
        metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=cluster_name)
        if 0 < len(current_services) < len(
                available_storagerouters) and metadata['internal'] is True:
            for storagerouter, partition in available_storagerouters.iteritems(
            ):
                if storagerouter.ip in current_ips:
                    continue
                arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
                arakoon_installer.load()
                arakoon_installer.extend_cluster(
                    new_ip=storagerouter.ip,
                    base_dir=partition.folder,
                    log_sinks=LogHandler.get_sink_path(
                        'arakoon-server_{0}'.format(cluster_name)),
                    crash_log_sinks=LogHandler.get_sink_path(
                        'arakoon-server-crash_{0}'.format(cluster_name)))
                _add_service(
                    service_storagerouter=storagerouter,
                    arakoon_ports=arakoon_installer.ports[storagerouter.ip],
                    service_name=ArakoonInstaller.get_service_name_for_cluster(
                        cluster_name=cluster_name))
                current_ips.append(storagerouter.ip)
                arakoon_installer.restart_cluster_after_extending(
                    new_ip=storagerouter.ip)
            StorageDriverController._configure_arakoon_to_volumedriver(
                cluster_name=cluster_name)
    def test_node_config_checkup(self):
        """
        Validates correct working of cluster registry checkup
        """
        base_structure = {
            '1': {
                'vrouter_id': '1',
                'message_host': '10.0.1.1',
                'message_port': 1,
                'xmlrpc_host': '10.0.0.1',
                'xmlrpc_port': 2,
                'failovercache_host': '10.0.1.1',
                'failovercache_port': 3,
                'network_server_uri': 'tcp://10.0.1.1:4',
                'node_distance_map': None
            },
            '2': {
                'vrouter_id': '2',
                'message_host': '10.0.1.2',
                'message_port': 1,
                'xmlrpc_host': '10.0.0.2',
                'xmlrpc_port': 2,
                'failovercache_host': '10.0.1.2',
                'failovercache_port': 3,
                'network_server_uri': 'tcp://10.0.1.2:4',
                'node_distance_map': None
            }
        }

        def _validate_node_config(_config, _expected_map):
            expected = copy.deepcopy(base_structure[_config.vrouter_id])
            expected['node_distance_map'] = _expected_map[_config.vrouter_id]
            self.assertDictEqual(
                expected, {
                    'vrouter_id': _config.vrouter_id,
                    'message_host': _config.message_host,
                    'message_port': _config.message_port,
                    'xmlrpc_host': _config.xmlrpc_host,
                    'xmlrpc_port': _config.xmlrpc_port,
                    'failovercache_host': _config.failovercache_host,
                    'failovercache_port': _config.failovercache_port,
                    'network_server_uri': _config.network_server_uri,
                    'node_distance_map': _config.node_distance_map
                })

        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'domains': [1, 2],
            'storagerouters': [1, 2],
            'storagedrivers':
            [(1, 1, 1), (2, 1, 2)],  # (<id>, <vpool_id>, <storagerouter_id>)
            'storagerouter_domains': [(1, 1, 1, False), (2, 2, 1, False)]
        }  # (id>, <storagerouter_id>, <domain_id>, <backup>)
                                                  )
        storagerouters = structure['storagerouters']
        vpool = structure['vpools'][1]
        arakoon_installer = ArakoonInstaller(cluster_name='voldrv')
        arakoon_installer.create_cluster(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD,
            ip=storagerouters[1].ip,
            base_dir='/tmp')

        # Initial run, it will now be configured
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': True
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), ['1', '2'])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.NEAR
            },
            '2': {
                '1': StorageDriver.DISTANCES.NEAR
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Running it again should not change anything
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': False
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), [])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.NEAR
            },
            '2': {
                '1': StorageDriver.DISTANCES.NEAR
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Validate some error paths
        domain = structure['domains'][2]
        junction = structure['storagerouters'][1].domains[0]
        junction.domain = domain
        junction.save()
        vpool_config_path = 'file://opt/OpenvStorage/config/framework.json?key=/ovs/vpools/{0}/hosts/1/config'.format(
            vpool.guid)
        StorageRouterClient.exceptions['server_revision'] = {
            vpool_config_path: Exception('ClusterNotReachableException')
        }
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': True
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), ['2'])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.INFINITE
            },
            '2': {
                '1': StorageDriver.DISTANCES.INFINITE
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)
示例#9
0
    def demote_node(cluster_ip,
                    master_ip,
                    ip_client_map,
                    unique_id,
                    unconfigure_memcached,
                    unconfigure_rabbitmq,
                    offline_nodes=None):
        """
        Demotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Demoting node',
                    title=True)
        service_manager = ServiceFactory.get_manager()
        if offline_nodes is None:
            offline_nodes = []

        if unconfigure_memcached is True and len(offline_nodes) == 0:
            if NodeTypeController._validate_local_memcache_servers(
                    ip_client_map) is False:
                raise RuntimeError(
                    'Not all memcache nodes can be reached which is required for demoting a node.'
                )

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(
            Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name)
        master_node_ips = [node.ip for node in config.nodes]
        shrink = False
        if cluster_ip in master_node_ips:
            shrink = True
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError(
                'There should be at least one other master node')

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'EXTRA'
        storagerouter.save()

        offline_node_ips = [node.ip for node in offline_nodes]
        if arakoon_metadata['internal'] is True and shrink is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Leaving Arakoon {0} cluster'.format(
                            arakoon_cluster_name))
            arakoon_installer = ArakoonInstaller(
                cluster_name=arakoon_cluster_name)
            arakoon_installer.load()
            arakoon_installer.shrink_cluster(removal_ip=cluster_ip,
                                             offline_nodes=offline_node_ips)
            arakoon_installer.restart_cluster_after_shrinking()
        try:
            external_config = Configuration.get(
                '/ovs/framework/external_config')
            if external_config is None and shrink is True:
                Toolbox.log(logger=NodeTypeController._logger,
                            messages='Leaving Arakoon config cluster')
                arakoon_installer = ArakoonInstaller(cluster_name='config')
                arakoon_installer.load(ip=master_node_ips[0])
                arakoon_installer.shrink_cluster(
                    removal_ip=cluster_ip, offline_nodes=offline_node_ips)
                arakoon_installer.restart_cluster_after_shrinking()
        except Exception as ex:
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['\nFailed to leave configuration cluster', ex],
                loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update configurations')
        try:
            if unconfigure_memcached is True:
                endpoints = Configuration.get(
                    '/ovs/framework/memcache|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 11211)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints',
                                  endpoints)
            if unconfigure_rabbitmq is True:
                endpoints = Configuration.get(
                    '/ovs/framework/messagequeue|endpoints')
                endpoint = '{0}:{1}'.format(cluster_ip, 5672)
                if endpoint in endpoints:
                    endpoints.remove(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints',
                                  endpoints)
        except Exception as ex:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages=['\nFailed to update configurations', ex],
                        loglevel='exception')

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting master node services')
            remaining_nodes = ip_client_map.keys()[:]
            if cluster_ip in remaining_nodes:
                remaining_nodes.remove(cluster_ip)

            PersistentFactory.store = None
            VolatileFactory.store = None

            for service in storagerouter.services:
                if service.name == 'arakoon-ovsdb':
                    service.delete()

        target_client = None
        if storagerouter in offline_nodes:
            if unconfigure_rabbitmq is True:
                Toolbox.log(
                    logger=NodeTypeController._logger,
                    messages='Removing/unconfiguring offline RabbitMQ node')
                client = ip_client_map[master_ip]
                try:
                    client.run([
                        'rabbitmqctl', 'forget_cluster_node',
                        'rabbit@{0}'.format(storagerouter.name)
                    ])
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger,
                                messages=[
                                    '\nFailed to forget RabbitMQ cluster node',
                                    ex
                                ],
                                loglevel='exception')
        else:
            target_client = ip_client_map[cluster_ip]
            if unconfigure_rabbitmq is True:
                Toolbox.log(logger=NodeTypeController._logger,
                            messages='Removing/unconfiguring RabbitMQ')
                try:
                    if service_manager.has_service('rabbitmq-server',
                                                   client=target_client):
                        ServiceFactory.change_service_state(
                            target_client, 'rabbitmq-server', 'stop',
                            NodeTypeController._logger)
                        target_client.run(['rabbitmq-server', '-detached'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop_app'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'reset'])
                        time.sleep(5)
                        target_client.run(['rabbitmqctl', 'stop'])
                        time.sleep(5)
                        target_client.file_unlink(
                            "/var/lib/rabbitmq/.erlang.cookie")
                        ServiceFactory.change_service_state(
                            target_client, 'rabbitmq-server', 'stop',
                            NodeTypeController._logger)  # To be sure
                except Exception as ex:
                    Toolbox.log(logger=NodeTypeController._logger,
                                messages=[
                                    '\nFailed to remove/unconfigure RabbitMQ',
                                    ex
                                ],
                                loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Stopping services')
            services = ['memcached', 'rabbitmq-server']
            if unconfigure_rabbitmq is False:
                services.remove('rabbitmq-server')
            if unconfigure_memcached is False:
                services.remove('memcached')
            for service in services:
                if service_manager.has_service(service, client=target_client):
                    Toolbox.log(
                        logger=NodeTypeController._logger,
                        messages='Stopping service {0}'.format(service))
                    try:
                        ServiceFactory.change_service_state(
                            target_client, service, 'stop',
                            NodeTypeController._logger)
                    except Exception as ex:
                        Toolbox.log(
                            logger=NodeTypeController._logger,
                            messages=[
                                '\nFailed to stop service'.format(service), ex
                            ],
                            loglevel='exception')

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Removing services')
            services = [
                'scheduled-tasks', 'webapp-api', 'volumerouter-consumer'
            ]
            for service in services:
                if service_manager.has_service(service, client=target_client):
                    Toolbox.log(
                        logger=NodeTypeController._logger,
                        messages='Removing service {0}'.format(service))
                    try:
                        ServiceFactory.change_service_state(
                            target_client, service, 'stop',
                            NodeTypeController._logger)
                        service_manager.remove_service(service,
                                                       client=target_client)
                    except Exception as ex:
                        Toolbox.log(
                            logger=NodeTypeController._logger,
                            messages=[
                                '\nFailed to remove service'.format(service),
                                ex
                            ],
                            loglevel='exception')

            if service_manager.has_service('workers', client=target_client):
                service_manager.add_service(
                    name='workers',
                    client=target_client,
                    params={'WORKER_QUEUE': '{0}'.format(unique_id)})
        try:
            NodeTypeController._configure_amqp_to_volumedriver()
        except Exception as ex:
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['\nFailed to configure AMQP to Storage Driver', ex],
                loglevel='exception')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(
            clients=ip_client_map,
            logger=NodeTypeController._logger,
            offline_node_ips=offline_node_ips)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='demote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip,
                             offline_node_ips=offline_node_ips):
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                logger=NodeTypeController._logger,
                offline_node_ips=offline_node_ips)

        if storagerouter not in offline_nodes:
            target_client = ip_client_map[cluster_ip]
            node_name, _ = target_client.get_hostname()
            if NodeTypeController.avahi_installed(
                    client=target_client,
                    logger=NodeTypeController._logger) is True:
                NodeTypeController.configure_avahi(
                    client=target_client,
                    node_name=node_name,
                    node_type='extra',
                    logger=NodeTypeController._logger)
        Configuration.set(
            '/ovs/framework/hosts/{0}/type'.format(storagerouter.machine_id),
            'EXTRA')

        if target_client is not None and target_client.file_exists(
                '/tmp/ovs_rollback'):
            target_client.file_write('/tmp/ovs_rollback', 'rollback')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Demote complete',
                    title=True)
示例#10
0
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id,
                     configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Promoting node',
                    title=True)
        service_manager = ServiceFactory.get_manager()
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(
                    ip_client_map) is False:
                raise RuntimeError(
                    'Not all memcache nodes can be reached which is required for promoting a node.'
                )

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Joining Arakoon configuration cluster')
            arakoon_installer = ArakoonInstaller(cluster_name='config')
            arakoon_installer.load(ip=master_ip)
            arakoon_installer.extend_cluster(
                new_ip=cluster_ip,
                base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=cluster_ip)
            service_manager.register_service(
                node_name=machine_id,
                service_metadata=arakoon_installer.service_metadata[cluster_ip]
            )

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(
            Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name)
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError(
                'There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Joining Arakoon OVS DB cluster')
            arakoon_installer = ArakoonInstaller(
                cluster_name=arakoon_cluster_name)
            arakoon_installer.load()
            arakoon_installer.extend_cluster(
                new_ip=cluster_ip,
                base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=cluster_ip)
            arakoon_ports = arakoon_installer.ports[cluster_ip]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(
                client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client,
                                        node_type='master',
                                        logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints',
                                  endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get(
                '/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints',
                                  endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting master node services')
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [
                    s.name for s in ServiceList.get_services() if
                    s.is_internal is False or s.storagerouter.ip == cluster_ip
            ]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(
                client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Copying RabbitMQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=0400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run([
                'rabbitmqctl', 'join_cluster',
                'rabbit@{0}'.format(master_hostname)
            ])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            ServiceFactory.change_service_state(target_client,
                                                'rabbitmq-server', 'start',
                                                NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(
                client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if service_manager.has_service(service, client=target_client):
                ServiceFactory.change_service_state(target_client, service,
                                                    'start',
                                                    NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(
            clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(
                client=target_client,
                logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(
                client=target_client,
                node_name=node_name,
                node_type='master',
                logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id),
                          'MASTER')
        target_client.run(
            ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set(
            '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id),
            True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Promote complete')
示例#11
0
    def test_alba_arakoon_checkup(self):
        """
        Validates whether the ALBA Arakoon checkup works (Manual and Scheduled)
        """
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})

        #############################
        # SCHEDULED_ARAKOON_CHECKUP #
        #############################
        # Create an ABM and NSM cluster for ALBA Backend 1 and do some basic validations
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        abm_cluster_name = '{0}-abm'.format(ab_1.name)
        nsm_cluster_name = '{0}-nsm_0'.format(ab_1.name)
        arakoon_clusters = sorted(Configuration.list('/ovs/arakoon'))
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)

        abm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=abm_cluster_name)
        nsm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=nsm_cluster_name)
        self.assertTrue(expr=abm_metadata['in_use'])
        self.assertTrue(expr=nsm_metadata['in_use'])

        # Run scheduled Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Create 2 additional StorageRouters
        srs = DalHelper.build_dal_structure(
            structure={'storagerouters': [2, 3]},
            previous_structure=ovs_structure)['storagerouters']
        sr_2 = srs[2]
        sr_3 = srs[3]

        # Run scheduled checkup again and do some validations
        MockedSSHClient._run_returns[sr_2.ip] = {}
        MockedSSHClient._run_returns[sr_3.ip] = {}
        MockedSSHClient._run_returns[sr_2.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_3/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_2.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_2.machine_id)] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_3.machine_id)] = None
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services),
                         second=3)  # Gone up from 1 to 3
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)  # Still 1 since NSM checkup hasn't ran yet

        # Make sure 1 StorageRouter is unreachable
        SSHClient._raise_exceptions[sr_3.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        alba_logs = Logger._logs.get('lib', [])
        self.assertIn(
            member='Storage Router with IP {0} is not reachable'.format(
                sr_3.ip),
            container=alba_logs)

        ##########################
        # MANUAL_ARAKOON_CHECKUP #
        ##########################
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        # Run manual Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid, nsm_clusters=[], abm_cluster=None)
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Test some error paths
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['no_abm_cluster_passed'])
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[],
                abm_cluster='no_nsm_clusters_passed')
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[nsm_cluster_name],
                abm_cluster=abm_cluster_name)
        self.assertEqual(first=raise_info.exception.message,
                         second='Cluster {0} has already been claimed'.format(
                             abm_cluster_name))
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['non-existing-nsm-cluster'],
                abm_cluster='non-existing-abm-cluster')
        self.assertEqual(
            first=raise_info.exception.message,
            second=
            'Could not find an Arakoon cluster with name: non-existing-abm-cluster'
        )

        # Recreate ALBA Backend with Arakoon clusters
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]

        # Create some Arakoon clusters to be claimed by the manual checkup
        for cluster_name, cluster_type in {
                'manual-abm-1': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-abm-2': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-nsm-1': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-2': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-3': ServiceType.ARAKOON_CLUSTER_TYPES.NSM
        }.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(
                cluster_type=cluster_type,
                ip=sr_1.ip,
                base_dir=DalHelper.CLUSTER_DIR.format(cluster_name),
                internal=False)
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid,
            nsm_clusters=['manual-nsm-1', 'manual-nsm-3'],
            abm_cluster='manual-abm-2')

        # Validate the correct clusters have been claimed by the manual checkup
        unused_abms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.ABM)
        unused_nsms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM)
        self.assertEqual(first=len(unused_abms), second=1)
        self.assertEqual(first=len(unused_nsms), second=1)
        self.assertEqual(first=unused_abms[0]['cluster_name'],
                         second='manual-abm-1')
        self.assertEqual(first=unused_nsms[0]['cluster_name'],
                         second='manual-nsm-2')
    def ensure_nsm_clusters_load(cls,
                                 alba_backend,
                                 nsms_per_storagerouter=None,
                                 min_internal_nsms=1,
                                 external_nsm_cluster_names=None,
                                 version_str=None,
                                 ssh_clients=None):
        # type: (AlbaBackend, Optional[Dict[StorageRouter, int]], Optional[int], Optional[List[str], Optional[str]], Optional[StorageRouter, SSHClient]) -> None
        """
        Ensure that all NSM clusters are not overloaded
        :param alba_backend: Alba Backend to ensure NSM Cluster load for
        :type alba_backend: AlbaBackend
        :param nsms_per_storagerouter: Amount of NSMs mapped by StorageRouter
        :type nsms_per_storagerouter: Dict[StorageRouter, int]
        :param min_internal_nsms: Minimum amount of NSM hosts that need to be provided
        :type min_internal_nsms: int
        :param external_nsm_cluster_names: Information about the additional clusters to claim (only for externally managed Arakoon clusters)
        :type external_nsm_cluster_names: list
        :param version_str: Alba version string
        :type version_str: str
        :param ssh_clients: SSHClients to use
        :type ssh_clients: Dict[Storagerouter, SSHClient]
        :return: None
        :rtype: NoneType
        """
        if ssh_clients is None:
            ssh_clients = {}
        if external_nsm_cluster_names is None:
            external_nsm_cluster_names = []

        nsms_per_storagerouter = nsms_per_storagerouter if nsms_per_storagerouter is not None else cls.get_nsms_per_storagerouter(
            alba_backend)
        version_str = version_str or AlbaArakoonInstaller.get_alba_version_string(
        )
        nsm_loads = cls.get_nsm_loads(alba_backend)
        internal = AlbaArakoonInstaller.is_internally_managed(alba_backend)
        abm_cluster_name = alba_backend.abm_cluster.name

        safety = Configuration.get(
            '/ovs/framework/plugins/alba/config|nsm.safety')
        maxload = Configuration.get(
            '/ovs/framework/plugins/alba/config|nsm.maxload')

        overloaded = min(nsm_loads.values()) >= maxload
        if not overloaded:
            # At least 1 NSM is not overloaded yet
            AlbaArakoonController._logger.debug(
                'ALBA Backend {0} - NSM load OK'.format(alba_backend.name))
            if internal:
                # When load is not OK, deploy at least 1 additional NSM
                nsms_to_add = max(0, min_internal_nsms - len(nsm_loads))
            else:
                nsms_to_add = len(external_nsm_cluster_names)
            if nsms_to_add == 0:
                return
        else:
            AlbaArakoonController._logger.warning(
                'ALBA Backend {0} - NSM load is NOT OK'.format(
                    alba_backend.name))
            if internal:
                # When load is not OK, deploy at least 1 additional NSM
                nsms_to_add = max(1, min_internal_nsms - len(nsm_loads))
            else:
                # For externally managed clusters we only claim the specified clusters, if none provided, we just log it
                nsms_to_add = len(external_nsm_cluster_names)
                if nsms_to_add == 0:
                    cls._logger.critical(
                        'ALBA Backend {0} - All NSM clusters are overloaded'.
                        format(alba_backend.name))
                    return

        # Deploy new (internal) or claim existing (external) NSM clusters
        cls._logger.debug(
            'ALBA Backend {0} - Currently {1} NSM cluster{2}'.format(
                alba_backend.name, len(nsm_loads),
                '' if len(nsm_loads) == 1 else 's'))
        AlbaArakoonController._logger.debug(
            'ALBA Backend {0} - Trying to add {1} NSM cluster{2}'.format(
                alba_backend.name, nsms_to_add,
                '' if nsms_to_add == 1 else 's'))
        base_number = max(nsm_loads.keys()) + 1
        for index, number in enumerate(
                xrange(base_number, base_number + nsms_to_add)):
            if not internal:
                # External clusters
                master_client = None
                if not ssh_clients:
                    for storagerouter in StorageRouterList.get_masters():
                        try:
                            master_client = SSHClient(storagerouter)
                        except UnableToConnectException:
                            cls._logger.warning(
                                'StorageRouter {0} with IP {1} is not reachable'
                                .format(storagerouter.name, storagerouter.ip))
                else:
                    for storagerouter, ssh_client in ssh_clients.iteritems():
                        if storagerouter.node_type == 'MASTER':
                            master_client = ssh_client
                if not master_client:
                    raise ValueError('Could not find an online master node')
                # @todo this might raise an indexerror?
                nsm_cluster_name = external_nsm_cluster_names[index]
                cls._logger.debug(
                    'ALBA Backend {0} - Claiming NSM cluster {1}'.format(
                        alba_backend.name, nsm_cluster_name))
                metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim(
                    cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                    cluster_name=nsm_cluster_name)
                if metadata is None:
                    cls._logger.critical(
                        'ALBA Backend {0} - NSM cluster with name {1} could not be found'
                        .format(alba_backend.name, nsm_cluster_name))
                    continue

                cls._logger.debug(
                    'ALBA Backend {0} - Modeling services'.format(
                        alba_backend.name))
                AlbaArakoonInstaller.model_arakoon_service(
                    alba_backend=alba_backend,
                    cluster_name=nsm_cluster_name,
                    number=number)
                cls._logger.debug('ALBA Backend {0} - Registering NSM'.format(
                    alba_backend.name))
                NSMInstaller.register_nsm(abm_name=abm_cluster_name,
                                          nsm_name=nsm_cluster_name,
                                          ip=master_client.ip)
                AlbaArakoonController._logger.debug(
                    'ALBA Backend {0} - Extended cluster'.format(
                        alba_backend.name))
            else:
                # Internal clusters
                nsm_cluster_name = '{0}-nsm_{1}'.format(
                    alba_backend.name, number)
                cls._logger.debug(
                    'ALBA Backend {0} - Adding NSM cluster {1}'.format(
                        alba_backend.name, nsm_cluster_name))

                # One of the NSM nodes is overloaded. This means the complete NSM is considered overloaded
                # Figure out which StorageRouters are the least occupied
                loads = sorted(nsms_per_storagerouter.values())[:safety]
                storagerouters = []
                for storagerouter, load in nsms_per_storagerouter.iteritems():
                    if load in loads:
                        storagerouters.append(storagerouter)
                    if len(storagerouters) == safety:
                        break
                # Creating a new NSM cluster
                for sub_index, storagerouter in enumerate(storagerouters):
                    nsms_per_storagerouter[storagerouter] += 1
                    partition = AlbaArakoonInstaller.get_db_partition(
                        storagerouter)
                    arakoon_installer = ArakoonInstaller(
                        cluster_name=nsm_cluster_name)
                    # @todo Use deploy and extend code. (Disable register nsm in those parts)
                    if sub_index == 0:
                        arakoon_installer.create_cluster(
                            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                            ip=storagerouter.ip,
                            base_dir=partition.folder,
                            plugins={NSM_PLUGIN: version_str})
                    else:
                        cls._logger.debug(
                            'ALBA Backend {0} - Extending NSM cluster {1}'.
                            format(alba_backend.name, nsm_cluster_name))
                        arakoon_installer.load()
                        arakoon_installer.extend_cluster(
                            new_ip=storagerouter.ip,
                            base_dir=partition.folder,
                            plugins={NSM_PLUGIN: version_str})
                    cls._logger.debug(
                        'ALBA Backend {0} - Linking plugins'.format(
                            alba_backend.name))
                    ssh_client = ssh_clients.get(storagerouter) or SSHClient(
                        StorageRouter)
                    AlbaArakoonInstaller.link_plugins(
                        client=ssh_client,
                        data_dir=partition.folder,
                        plugins=[NSM_PLUGIN],
                        cluster_name=nsm_cluster_name)
                    cls._logger.debug(
                        'ALBA Backend {0} - Modeling services'.format(
                            alba_backend.name))
                    AlbaArakoonInstaller.model_arakoon_service(
                        alba_backend=alba_backend,
                        cluster_name=nsm_cluster_name,
                        ports=arakoon_installer.ports[storagerouter.ip],
                        storagerouter=storagerouter,
                        number=number)
                    if sub_index == 0:
                        cls._logger.debug(
                            'ALBA Backend {0} - Starting cluster'.format(
                                alba_backend.name))
                        arakoon_installer.start_cluster()
                    else:
                        AlbaArakoonController._logger.debug(
                            'ALBA Backend {0} - Restarting cluster'.format(
                                alba_backend.name))
                        arakoon_installer.restart_cluster_after_extending(
                            new_ip=storagerouter.ip)
                cls._logger.debug('ALBA Backend {0} - Registering NSM'.format(
                    alba_backend.name))
                NSMInstaller.register_nsm(abm_name=abm_cluster_name,
                                          nsm_name=nsm_cluster_name,
                                          ip=storagerouters[0].ip)
                cls._logger.debug(
                    'ALBA Backend {0} - Added NSM cluster {1}'.format(
                        alba_backend.name, nsm_cluster_name))
示例#13
0
    def test_nsm_checkup_external(self):
        """
        Validates whether the NSM checkup works for externally managed Arakoon clusters
        """
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1)
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10)

        structure = DalHelper.build_dal_structure(structure={'storagerouters': [1, 2, 3]})
        alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]})

        alba_backend = alba_structure['alba_backends'][1]
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]

        # Validate some logic for externally managed arakoons during NSM checkup
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(external_nsm_cluster_names=['test'])  # No ALBA Backend specified
        self.assertEqual(first=str(raise_info.exception), second='Additional NSMs can only be configured for a specific ALBA Backend')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, min_internal_nsms=2, external_nsm_cluster_names=['test'])
        self.assertEqual(first=str(raise_info.exception), second="'min_internal_nsms' and 'external_nsm_cluster_names' are mutually exclusive")
        with self.assertRaises(ValueError) as raise_info:
            # noinspection PyTypeChecker
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names={})  # NSM cluster names must be a list
        self.assertEqual(first=str(raise_info.exception), second="'external_nsm_cluster_names' must be of type 'list'")
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=['non-existing-cluster'])  # non-existing cluster names should raise
        self.assertEqual(first=str(raise_info.exception), second="Arakoon cluster with name non-existing-cluster does not exist")

        # Create an external ABM and NSM Arakoon cluster
        external_abm_1 = 'backend_1-abm'
        external_nsm_1 = 'backend_1-nsm_0'
        external_nsm_2 = 'backend_1-nsm_1'
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir='/tmp', internal=False)
            arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir='/tmp')
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': False},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))

        # Let the 'add_cluster` claim the externally managed clusters and model the services
        Logger._logs = {}
        AlbaController.add_cluster(alba_backend_guid=alba_backend.guid,
                                   abm_cluster=external_abm_1,
                                   nsm_clusters=[external_nsm_1])  # Only claim external_nsm_1
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': False if cluster_name == external_nsm_2 else True},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))
        log_found = False
        for log_record in Logger._logs.get('lib', []):
            if 'NSM load OK' in log_record:
                log_found = True
                break
        self.assertTrue(expr=log_found)
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'],
                                                                           ['add_nsm_host', 'backend_1-nsm_0'],
                                                                           ['update_maintenance_config','--eviction-type-random'],
                                                                           ['update_maintenance_config','enable-auto-cleanup-deleted-namespaces-days']])

        # Add cluster already invokes a NSM checkup, so nothing should have changed
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()
        self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm'])

        # Overload the only NSM and run NSM checkup. This should log a critical message, but change nothing
        VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25
        Logger._logs = {}
        AlbaArakoonController.nsm_checkup()
        log_found = False
        for log_record in Logger._logs.get('lib', []):
            if 'All NSM clusters are overloaded' in log_record:
                log_found = True
                break
        self.assertTrue(expr=log_found)
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm'])

        # Validate a maximum of 50 NSMs can be deployed
        current_nsms = [nsm_cluster.number for nsm_cluster in alba_backend.nsm_clusters]
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_nsm_clusters': [(1, 50)]},  # (<abackend_id>, <amount_of_nsm_clusters>)
            previous_structure=alba_structure
        )
        # Try to add 1 additional NSM
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2])
        self.assertEqual(first=str(raise_info.exception), second='The maximum of 50 NSM Arakoon clusters will be exceeded. Amount of clusters that can be deployed for this ALBA Backend: 0')

        # Remove the unused NSM clusters again
        for nsm_cluster in alba_structure['alba_nsm_clusters'][1][len(current_nsms):]:
            for nsm_service in nsm_cluster.nsm_services:
                nsm_service.delete()
                nsm_service.service.delete()
            nsm_cluster.delete()

        # Try to add a previously claimed NSM cluster
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_1])  # The provided cluster_name to claim has already been claimed
        self.assertEqual(first=str(raise_info.exception), second='Some of the provided cluster_names have already been claimed before')

        # Add a 2nd NSM cluster
        AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2])
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=2, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[1].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[1].nsm_services[0].service.storagerouter)
        self.assertListEqual(list1=[['add_nsm_host', 'backend_1-nsm_1']], list2=VirtualAlbaBackend.run_log['backend_1-abm'])
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': True},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))