示例#1
0
    def add_arakoon(cluster_name,
                    storagerouter_ip,
                    cluster_basedir,
                    service_type=ServiceType.ARAKOON_CLUSTER_TYPES.FWK):
        """
        Adds a external arakoon to a storagerouter

        :param cluster_name: name of the new arakoon cluster
        :type cluster_name: str
        :param service_type: type of plugin for arakoon (DEFAULT=ServiceType.ARAKOON_CLUSTER_TYPES.FWK)
            * FWK
            * ABM
            * NSM
        :type service_type: ovs.dal.hybrids.ServiceType.ARAKOON_CLUSTER_TYPES
        :param storagerouter_ip: ip of a storagerouter
        :type storagerouter_ip: str
        :param cluster_basedir: absolute path for the new arakoon cluster
        :type cluster_basedir: str
        :return:
        """
        client = SSHClient(storagerouter_ip, username='******')

        # create required directories
        if not client.dir_exists(cluster_basedir):
            client.dir_create(cluster_basedir)

        # determine plugin
        if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.FWK:
            plugins = None
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
            plugins = {
                AlbaController.ABM_PLUGIN: AlbaController.ALBA_VERSION_GET
            }
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
            plugins = {
                AlbaController.NSM_PLUGIN: AlbaController.ALBA_VERSION_GET
            }
        else:
            raise RuntimeError(
                "Incompatible Arakoon cluster type selected: {0}".format(
                    service_type))

        ArakoonSetup.LOGGER.info(
            "Starting creation of new arakoon cluster with name `{0}`, servicetype `{1}`, ip `{2}`, base_dir `{3}`"
            .format(cluster_name, service_type, storagerouter_ip,
                    cluster_basedir))
        arakoon_installer = ArakoonInstaller(cluster_name)
        arakoon_installer.create_cluster(
            cluster_type=service_type,
            ip=storagerouter_ip,
            base_dir=cluster_basedir,
            plugins=plugins,
            locked=False,
            internal=False,
            log_sinks=Logger.get_sink_path('automation_lib_arakoon_server'),
            crash_log_sinks=Logger.get_sink_path(
                'automation_lib_arakoon_server_crash'))
        if service_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
            client.run([
                'ln', '-s', '/usr/lib/alba/albamgr_plugin.cmxs',
                '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name)
            ])
        elif service_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
            client.run([
                'ln', '-s', '/usr/lib/alba/nsm_host_plugin.cmxs',
                '{0}/arakoon/{1}/db'.format(cluster_basedir, cluster_name)
            ])
        arakoon_installer.start_cluster()
        arakoon_installer.unclaim_cluster()
        ArakoonSetup.LOGGER.info(
            "Finished creation of new arakoon cluster with name `{0}`, servicetype `{1}`, ip `{2}`, base_dir `{3}`"
            .format(cluster_name, service_type, storagerouter_ip,
                    cluster_basedir))
示例#2
0
    def test_arakoon_collapse(self):
        """
        Test the Arakoon collapse functionality
        """
        # Set up the test
        structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1, 2]})
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]
        MockedSSHClient._run_returns[storagerouter_1.ip] = {}
        MockedSSHClient._run_returns[storagerouter_2.ip] = {}

        # Make sure we cover all Arakoon cluster types
        clusters_to_create = {
            ServiceType.ARAKOON_CLUSTER_TYPES.SD: [{
                'name': 'unittest-voldrv',
                'internal': True,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.CFG: [{
                'name': 'unittest-cacc',
                'internal': True,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.FWK: [{
                'name': 'unittest-ovsdb',
                'internal': True,
                'success': False
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.ABM: [{
                'name': 'unittest-cluster-1-abm',
                'internal': True,
                'success': False
            }, {
                'name': 'unittest-random-abm-name',
                'internal': False,
                'success': True
            }],
            ServiceType.ARAKOON_CLUSTER_TYPES.NSM: [{
                'name': 'unittest-cluster-1-nsm_0',
                'internal': True,
                'success': True
            }]
        }
        self.assertEqual(
            first=sorted(clusters_to_create.keys()),
            second=sorted(ServiceType.ARAKOON_CLUSTER_TYPES.keys()),
            msg=
            'An Arakoon cluster type has been removed or added, please update this test accordingly'
        )

        # Create all Arakoon clusters and related services
        failed_clusters = []
        external_clusters = []
        successful_clusters = []
        for cluster_type, cluster_infos in clusters_to_create.iteritems():
            filesystem = cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG
            for cluster_info in cluster_infos:
                internal = cluster_info['internal']
                cluster_name = cluster_info['name']

                base_dir = DalHelper.CLUSTER_DIR.format(cluster_name)
                arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
                arakoon_installer.create_cluster(cluster_type=cluster_type,
                                                 ip=storagerouter_1.ip,
                                                 base_dir=base_dir,
                                                 internal=internal)
                arakoon_installer.start_cluster()
                arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip,
                                                 base_dir=base_dir)

                service_name = ArakoonInstaller.get_service_name_for_cluster(
                    cluster_name=cluster_name)
                if cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.ABM:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.ALBA_MGR)
                elif cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.NSM:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.NS_MGR)
                else:
                    service_type = ServiceTypeList.get_by_name(
                        ServiceType.SERVICE_TYPES.ARAKOON)

                if internal is True:
                    DalHelper.create_service(
                        service_name=service_name,
                        service_type=service_type,
                        storagerouter=storagerouter_1,
                        ports=arakoon_installer.ports[storagerouter_1.ip])
                    DalHelper.create_service(
                        service_name=service_name,
                        service_type=service_type,
                        storagerouter=storagerouter_2,
                        ports=arakoon_installer.ports[storagerouter_2.ip])
                else:
                    DalHelper.create_service(service_name=service_name,
                                             service_type=service_type)

                    external_clusters.append(cluster_name)
                    continue

                if cluster_info['success'] is True:
                    if filesystem is True:
                        config_path = ArakoonClusterConfig.CONFIG_FILE.format(
                            cluster_name)
                    else:
                        config_path = Configuration.get_configuration_path(
                            ArakoonClusterConfig.CONFIG_KEY.format(
                                cluster_name))
                    MockedSSHClient._run_returns[storagerouter_1.ip][
                        'arakoon --collapse-local 1 2 -config {0}'.format(
                            config_path)] = None
                    MockedSSHClient._run_returns[storagerouter_2.ip][
                        'arakoon --collapse-local 2 2 -config {0}'.format(
                            config_path)] = None
                    successful_clusters.append(cluster_name)
                else:  # For successful False clusters we don't emulate the collapse, thus making it fail
                    failed_clusters.append(cluster_name)

        # Start collapse and make it fail for all clusters on StorageRouter 2
        SSHClient._raise_exceptions[storagerouter_2.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        GenericController.collapse_arakoon()

        # Verify all log messages for each type of cluster
        generic_logs = Logger._logs.get('lib', {})
        for cluster_name in successful_clusters + failed_clusters + external_clusters:
            collect_msg = (
                'DEBUG',
                'Collecting info for cluster {0}'.format(cluster_name))
            unreachable_msg = (
                'ERROR',
                'Could not collapse any cluster on {0} (not reachable)'.format(
                    storagerouter_2.name))
            end_collapse_msg = (
                'DEBUG', 'Collapsing cluster {0} on {1} completed'.format(
                    cluster_name, storagerouter_1.ip))
            start_collapse_msg = ('DEBUG',
                                  'Collapsing cluster {0} on {1}'.format(
                                      cluster_name, storagerouter_1.ip))
            failed_collapse_msg = (
                'ERROR', 'Collapsing cluster {0} on {1} failed'.format(
                    cluster_name, storagerouter_1.ip))
            messages_to_validate = []
            if cluster_name in successful_clusters:
                assert_function = self.assertIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(unreachable_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(end_collapse_msg)
            elif cluster_name in failed_clusters:
                assert_function = self.assertIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(unreachable_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(failed_collapse_msg)
            else:
                assert_function = self.assertNotIn
                messages_to_validate.append(collect_msg)
                messages_to_validate.append(start_collapse_msg)
                messages_to_validate.append(end_collapse_msg)

            for severity, message in messages_to_validate:
                if assert_function == self.assertIn:
                    assert_message = 'Expected to find log message: {0}'.format(
                        message)
                else:
                    assert_message = 'Did not expect to find log message: {0}'.format(
                        message)
                assert_function(member=message,
                                container=generic_logs,
                                msg=assert_message)
                if assert_function == self.assertIn:
                    self.assertEqual(
                        first=severity,
                        second=generic_logs[message],
                        msg='Log message {0} is of severity {1} expected {2}'.
                        format(message, generic_logs[message], severity))

        # Collapse should always have a 'finished' message since each cluster should be attempted to be collapsed
        for general_message in [
                'Arakoon collapse started', 'Arakoon collapse finished'
        ]:
            self.assertIn(member=general_message,
                          container=generic_logs,
                          msg='Expected to find log message: {0}'.format(
                              general_message))
示例#3
0
    def _voldrv_arakoon_checkup(create_cluster):
        def _add_service(service_storagerouter, arakoon_ports, service_name):
            """ Add a service to the storage router """
            new_service = Service()
            new_service.name = service_name
            new_service.type = service_type
            new_service.ports = arakoon_ports
            new_service.storagerouter = service_storagerouter
            new_service.save()
            return new_service

        current_ips = []
        current_services = []
        service_type = ServiceTypeList.get_by_name(
            ServiceType.SERVICE_TYPES.ARAKOON)
        cluster_name = Configuration.get(
            '/ovs/framework/arakoon_clusters').get('voldrv')
        if cluster_name is not None:
            arakoon_service_name = ArakoonInstaller.get_service_name_for_cluster(
                cluster_name=cluster_name)
            for service in service_type.services:
                if service.name == arakoon_service_name:
                    current_services.append(service)
                    if service.is_internal is True:
                        current_ips.append(service.storagerouter.ip)

        all_sr_ips = [
            storagerouter.ip
            for storagerouter in StorageRouterList.get_slaves()
        ]
        available_storagerouters = {}
        for storagerouter in StorageRouterList.get_masters():
            storagerouter.invalidate_dynamics(['partition_config'])
            if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0:
                available_storagerouters[storagerouter] = DiskPartition(
                    storagerouter.partition_config[DiskPartition.ROLES.DB][0])
            all_sr_ips.append(storagerouter.ip)

        if create_cluster is True and len(
                current_services) == 0:  # Create new cluster
            metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim(
                cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD)
            if metadata is None:  # No externally managed cluster found, we create 1 ourselves
                if not available_storagerouters:
                    raise RuntimeError(
                        'Could not find any Storage Router with a DB role')

                storagerouter, partition = available_storagerouters.items()[0]
                arakoon_voldrv_cluster = 'voldrv'
                arakoon_installer = ArakoonInstaller(
                    cluster_name=arakoon_voldrv_cluster)
                arakoon_installer.create_cluster(
                    cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD,
                    ip=storagerouter.ip,
                    base_dir=partition.folder,
                    log_sinks=LogHandler.get_sink_path(
                        'arakoon-server_{0}'.format(arakoon_voldrv_cluster)),
                    crash_log_sinks=LogHandler.get_sink_path(
                        'arakoon-server-crash_{0}'.format(
                            arakoon_voldrv_cluster)))
                arakoon_installer.start_cluster()
                ports = arakoon_installer.ports[storagerouter.ip]
                metadata = arakoon_installer.metadata
                current_ips.append(storagerouter.ip)
            else:
                ports = []
                storagerouter = None

            cluster_name = metadata['cluster_name']
            Configuration.set('/ovs/framework/arakoon_clusters|voldrv',
                              cluster_name)
            StorageDriverController._logger.info(
                'Claiming {0} managed arakoon cluster: {1}'.format(
                    'externally' if storagerouter is None else 'internally',
                    cluster_name))
            StorageDriverController._configure_arakoon_to_volumedriver(
                cluster_name=cluster_name)
            current_services.append(
                _add_service(
                    service_storagerouter=storagerouter,
                    arakoon_ports=ports,
                    service_name=ArakoonInstaller.get_service_name_for_cluster(
                        cluster_name=cluster_name)))

        cluster_name = Configuration.get(
            '/ovs/framework/arakoon_clusters').get('voldrv')
        if cluster_name is None:
            return
        metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=cluster_name)
        if 0 < len(current_services) < len(
                available_storagerouters) and metadata['internal'] is True:
            for storagerouter, partition in available_storagerouters.iteritems(
            ):
                if storagerouter.ip in current_ips:
                    continue
                arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
                arakoon_installer.load()
                arakoon_installer.extend_cluster(
                    new_ip=storagerouter.ip,
                    base_dir=partition.folder,
                    log_sinks=LogHandler.get_sink_path(
                        'arakoon-server_{0}'.format(cluster_name)),
                    crash_log_sinks=LogHandler.get_sink_path(
                        'arakoon-server-crash_{0}'.format(cluster_name)))
                _add_service(
                    service_storagerouter=storagerouter,
                    arakoon_ports=arakoon_installer.ports[storagerouter.ip],
                    service_name=ArakoonInstaller.get_service_name_for_cluster(
                        cluster_name=cluster_name))
                current_ips.append(storagerouter.ip)
                arakoon_installer.restart_cluster_after_extending(
                    new_ip=storagerouter.ip)
            StorageDriverController._configure_arakoon_to_volumedriver(
                cluster_name=cluster_name)
    def test_node_config_checkup(self):
        """
        Validates correct working of cluster registry checkup
        """
        base_structure = {
            '1': {
                'vrouter_id': '1',
                'message_host': '10.0.1.1',
                'message_port': 1,
                'xmlrpc_host': '10.0.0.1',
                'xmlrpc_port': 2,
                'failovercache_host': '10.0.1.1',
                'failovercache_port': 3,
                'network_server_uri': 'tcp://10.0.1.1:4',
                'node_distance_map': None
            },
            '2': {
                'vrouter_id': '2',
                'message_host': '10.0.1.2',
                'message_port': 1,
                'xmlrpc_host': '10.0.0.2',
                'xmlrpc_port': 2,
                'failovercache_host': '10.0.1.2',
                'failovercache_port': 3,
                'network_server_uri': 'tcp://10.0.1.2:4',
                'node_distance_map': None
            }
        }

        def _validate_node_config(_config, _expected_map):
            expected = copy.deepcopy(base_structure[_config.vrouter_id])
            expected['node_distance_map'] = _expected_map[_config.vrouter_id]
            self.assertDictEqual(
                expected, {
                    'vrouter_id': _config.vrouter_id,
                    'message_host': _config.message_host,
                    'message_port': _config.message_port,
                    'xmlrpc_host': _config.xmlrpc_host,
                    'xmlrpc_port': _config.xmlrpc_port,
                    'failovercache_host': _config.failovercache_host,
                    'failovercache_port': _config.failovercache_port,
                    'network_server_uri': _config.network_server_uri,
                    'node_distance_map': _config.node_distance_map
                })

        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'domains': [1, 2],
            'storagerouters': [1, 2],
            'storagedrivers':
            [(1, 1, 1), (2, 1, 2)],  # (<id>, <vpool_id>, <storagerouter_id>)
            'storagerouter_domains': [(1, 1, 1, False), (2, 2, 1, False)]
        }  # (id>, <storagerouter_id>, <domain_id>, <backup>)
                                                  )
        storagerouters = structure['storagerouters']
        vpool = structure['vpools'][1]
        arakoon_installer = ArakoonInstaller(cluster_name='voldrv')
        arakoon_installer.create_cluster(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD,
            ip=storagerouters[1].ip,
            base_dir='/tmp')

        # Initial run, it will now be configured
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': True
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), ['1', '2'])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.NEAR
            },
            '2': {
                '1': StorageDriver.DISTANCES.NEAR
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Running it again should not change anything
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': False
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), [])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.NEAR
            },
            '2': {
                '1': StorageDriver.DISTANCES.NEAR
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Validate some error paths
        domain = structure['domains'][2]
        junction = structure['storagerouters'][1].domains[0]
        junction.domain = domain
        junction.save()
        vpool_config_path = 'file://opt/OpenvStorage/config/framework.json?key=/ovs/vpools/{0}/hosts/1/config'.format(
            vpool.guid)
        StorageRouterClient.exceptions['server_revision'] = {
            vpool_config_path: Exception('ClusterNotReachableException')
        }
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': True
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), ['2'])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.INFINITE
            },
            '2': {
                '1': StorageDriver.DISTANCES.INFINITE
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)
示例#5
0
    def test_alba_arakoon_checkup(self):
        """
        Validates whether the ALBA Arakoon checkup works (Manual and Scheduled)
        """
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})

        #############################
        # SCHEDULED_ARAKOON_CHECKUP #
        #############################
        # Create an ABM and NSM cluster for ALBA Backend 1 and do some basic validations
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        abm_cluster_name = '{0}-abm'.format(ab_1.name)
        nsm_cluster_name = '{0}-nsm_0'.format(ab_1.name)
        arakoon_clusters = sorted(Configuration.list('/ovs/arakoon'))
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)

        abm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=abm_cluster_name)
        nsm_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=nsm_cluster_name)
        self.assertTrue(expr=abm_metadata['in_use'])
        self.assertTrue(expr=nsm_metadata['in_use'])

        # Run scheduled Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Create 2 additional StorageRouters
        srs = DalHelper.build_dal_structure(
            structure={'storagerouters': [2, 3]},
            previous_structure=ovs_structure)['storagerouters']
        sr_2 = srs[2]
        sr_3 = srs[3]

        # Run scheduled checkup again and do some validations
        MockedSSHClient._run_returns[sr_2.ip] = {}
        MockedSSHClient._run_returns[sr_3.ip] = {}
        MockedSSHClient._run_returns[sr_2.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_2/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_3/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_2.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_2.machine_id)] = None
        MockedSSHClient._run_returns[sr_3.ip][
            'arakoon --node {0} -config file://opt/OpenvStorage/config/framework.json?key=/ovs/arakoon/backend_1-abm/config -catchup-only'
            .format(sr_3.machine_id)] = None
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services),
                         second=3)  # Gone up from 1 to 3
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)  # Still 1 since NSM checkup hasn't ran yet

        # Make sure 1 StorageRouter is unreachable
        SSHClient._raise_exceptions[sr_3.ip] = {
            'users': ['ovs'],
            'exception': UnableToConnectException('No route to host')
        }
        AlbaArakoonController.scheduled_alba_arakoon_checkup()
        alba_logs = Logger._logs.get('lib', [])
        self.assertIn(
            member='Storage Router with IP {0} is not reachable'.format(
                sr_3.ip),
            container=alba_logs)

        ##########################
        # MANUAL_ARAKOON_CHECKUP #
        ##########################
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]
        MockedSSHClient._run_returns[sr_1.ip] = {}
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/albamgr_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-abm/db'] = None
        MockedSSHClient._run_returns[sr_1.ip][
            'ln -s /usr/lib/alba/nsm_host_plugin.cmxs /tmp/unittest/sr_1/disk_1/partition_1/arakoon/backend_1-nsm_0/db'] = None
        AlbaController.add_cluster(ab_1.guid)

        # Run manual Arakoon checkup and validate amount of Arakoon clusters did not change
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid, nsm_clusters=[], abm_cluster=None)
        self.assertListEqual(list1=[abm_cluster_name, nsm_cluster_name],
                             list2=arakoon_clusters)
        self.assertEqual(first=len(ab_1.abm_cluster.abm_services), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters), second=1)
        self.assertEqual(first=len(ab_1.nsm_clusters[0].nsm_services),
                         second=1)

        # Test some error paths
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['no_abm_cluster_passed'])
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[],
                abm_cluster='no_nsm_clusters_passed')
        self.assertEqual(
            first=raise_info.exception.message,
            second='Both ABM cluster and NSM clusters must be provided')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=[nsm_cluster_name],
                abm_cluster=abm_cluster_name)
        self.assertEqual(first=raise_info.exception.message,
                         second='Cluster {0} has already been claimed'.format(
                             abm_cluster_name))
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.manual_alba_arakoon_checkup(
                alba_backend_guid=ab_1.guid,
                nsm_clusters=['non-existing-nsm-cluster'],
                abm_cluster='non-existing-abm-cluster')
        self.assertEqual(
            first=raise_info.exception.message,
            second=
            'Could not find an Arakoon cluster with name: non-existing-abm-cluster'
        )

        # Recreate ALBA Backend with Arakoon clusters
        AlbaDalHelper.setup()  # Clear everything
        ovs_structure = DalHelper.build_dal_structure(
            structure={'storagerouters': [1]})
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_backends': [[1, 'LOCAL']]})
        sr_1 = ovs_structure['storagerouters'][1]
        ab_1 = alba_structure['alba_backends'][1]

        # Create some Arakoon clusters to be claimed by the manual checkup
        for cluster_name, cluster_type in {
                'manual-abm-1': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-abm-2': ServiceType.ARAKOON_CLUSTER_TYPES.ABM,
                'manual-nsm-1': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-2': ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                'manual-nsm-3': ServiceType.ARAKOON_CLUSTER_TYPES.NSM
        }.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(
                cluster_type=cluster_type,
                ip=sr_1.ip,
                base_dir=DalHelper.CLUSTER_DIR.format(cluster_name),
                internal=False)
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
        AlbaArakoonController.manual_alba_arakoon_checkup(
            alba_backend_guid=ab_1.guid,
            nsm_clusters=['manual-nsm-1', 'manual-nsm-3'],
            abm_cluster='manual-abm-2')

        # Validate the correct clusters have been claimed by the manual checkup
        unused_abms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.ABM)
        unused_nsms = ArakoonInstaller.get_unused_arakoon_clusters(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM)
        self.assertEqual(first=len(unused_abms), second=1)
        self.assertEqual(first=len(unused_nsms), second=1)
        self.assertEqual(first=unused_abms[0]['cluster_name'],
                         second='manual-abm-1')
        self.assertEqual(first=unused_nsms[0]['cluster_name'],
                         second='manual-nsm-2')
    def ensure_nsm_clusters_load(cls,
                                 alba_backend,
                                 nsms_per_storagerouter=None,
                                 min_internal_nsms=1,
                                 external_nsm_cluster_names=None,
                                 version_str=None,
                                 ssh_clients=None):
        # type: (AlbaBackend, Optional[Dict[StorageRouter, int]], Optional[int], Optional[List[str], Optional[str]], Optional[StorageRouter, SSHClient]) -> None
        """
        Ensure that all NSM clusters are not overloaded
        :param alba_backend: Alba Backend to ensure NSM Cluster load for
        :type alba_backend: AlbaBackend
        :param nsms_per_storagerouter: Amount of NSMs mapped by StorageRouter
        :type nsms_per_storagerouter: Dict[StorageRouter, int]
        :param min_internal_nsms: Minimum amount of NSM hosts that need to be provided
        :type min_internal_nsms: int
        :param external_nsm_cluster_names: Information about the additional clusters to claim (only for externally managed Arakoon clusters)
        :type external_nsm_cluster_names: list
        :param version_str: Alba version string
        :type version_str: str
        :param ssh_clients: SSHClients to use
        :type ssh_clients: Dict[Storagerouter, SSHClient]
        :return: None
        :rtype: NoneType
        """
        if ssh_clients is None:
            ssh_clients = {}
        if external_nsm_cluster_names is None:
            external_nsm_cluster_names = []

        nsms_per_storagerouter = nsms_per_storagerouter if nsms_per_storagerouter is not None else cls.get_nsms_per_storagerouter(
            alba_backend)
        version_str = version_str or AlbaArakoonInstaller.get_alba_version_string(
        )
        nsm_loads = cls.get_nsm_loads(alba_backend)
        internal = AlbaArakoonInstaller.is_internally_managed(alba_backend)
        abm_cluster_name = alba_backend.abm_cluster.name

        safety = Configuration.get(
            '/ovs/framework/plugins/alba/config|nsm.safety')
        maxload = Configuration.get(
            '/ovs/framework/plugins/alba/config|nsm.maxload')

        overloaded = min(nsm_loads.values()) >= maxload
        if not overloaded:
            # At least 1 NSM is not overloaded yet
            AlbaArakoonController._logger.debug(
                'ALBA Backend {0} - NSM load OK'.format(alba_backend.name))
            if internal:
                # When load is not OK, deploy at least 1 additional NSM
                nsms_to_add = max(0, min_internal_nsms - len(nsm_loads))
            else:
                nsms_to_add = len(external_nsm_cluster_names)
            if nsms_to_add == 0:
                return
        else:
            AlbaArakoonController._logger.warning(
                'ALBA Backend {0} - NSM load is NOT OK'.format(
                    alba_backend.name))
            if internal:
                # When load is not OK, deploy at least 1 additional NSM
                nsms_to_add = max(1, min_internal_nsms - len(nsm_loads))
            else:
                # For externally managed clusters we only claim the specified clusters, if none provided, we just log it
                nsms_to_add = len(external_nsm_cluster_names)
                if nsms_to_add == 0:
                    cls._logger.critical(
                        'ALBA Backend {0} - All NSM clusters are overloaded'.
                        format(alba_backend.name))
                    return

        # Deploy new (internal) or claim existing (external) NSM clusters
        cls._logger.debug(
            'ALBA Backend {0} - Currently {1} NSM cluster{2}'.format(
                alba_backend.name, len(nsm_loads),
                '' if len(nsm_loads) == 1 else 's'))
        AlbaArakoonController._logger.debug(
            'ALBA Backend {0} - Trying to add {1} NSM cluster{2}'.format(
                alba_backend.name, nsms_to_add,
                '' if nsms_to_add == 1 else 's'))
        base_number = max(nsm_loads.keys()) + 1
        for index, number in enumerate(
                xrange(base_number, base_number + nsms_to_add)):
            if not internal:
                # External clusters
                master_client = None
                if not ssh_clients:
                    for storagerouter in StorageRouterList.get_masters():
                        try:
                            master_client = SSHClient(storagerouter)
                        except UnableToConnectException:
                            cls._logger.warning(
                                'StorageRouter {0} with IP {1} is not reachable'
                                .format(storagerouter.name, storagerouter.ip))
                else:
                    for storagerouter, ssh_client in ssh_clients.iteritems():
                        if storagerouter.node_type == 'MASTER':
                            master_client = ssh_client
                if not master_client:
                    raise ValueError('Could not find an online master node')
                # @todo this might raise an indexerror?
                nsm_cluster_name = external_nsm_cluster_names[index]
                cls._logger.debug(
                    'ALBA Backend {0} - Claiming NSM cluster {1}'.format(
                        alba_backend.name, nsm_cluster_name))
                metadata = ArakoonInstaller.get_unused_arakoon_metadata_and_claim(
                    cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                    cluster_name=nsm_cluster_name)
                if metadata is None:
                    cls._logger.critical(
                        'ALBA Backend {0} - NSM cluster with name {1} could not be found'
                        .format(alba_backend.name, nsm_cluster_name))
                    continue

                cls._logger.debug(
                    'ALBA Backend {0} - Modeling services'.format(
                        alba_backend.name))
                AlbaArakoonInstaller.model_arakoon_service(
                    alba_backend=alba_backend,
                    cluster_name=nsm_cluster_name,
                    number=number)
                cls._logger.debug('ALBA Backend {0} - Registering NSM'.format(
                    alba_backend.name))
                NSMInstaller.register_nsm(abm_name=abm_cluster_name,
                                          nsm_name=nsm_cluster_name,
                                          ip=master_client.ip)
                AlbaArakoonController._logger.debug(
                    'ALBA Backend {0} - Extended cluster'.format(
                        alba_backend.name))
            else:
                # Internal clusters
                nsm_cluster_name = '{0}-nsm_{1}'.format(
                    alba_backend.name, number)
                cls._logger.debug(
                    'ALBA Backend {0} - Adding NSM cluster {1}'.format(
                        alba_backend.name, nsm_cluster_name))

                # One of the NSM nodes is overloaded. This means the complete NSM is considered overloaded
                # Figure out which StorageRouters are the least occupied
                loads = sorted(nsms_per_storagerouter.values())[:safety]
                storagerouters = []
                for storagerouter, load in nsms_per_storagerouter.iteritems():
                    if load in loads:
                        storagerouters.append(storagerouter)
                    if len(storagerouters) == safety:
                        break
                # Creating a new NSM cluster
                for sub_index, storagerouter in enumerate(storagerouters):
                    nsms_per_storagerouter[storagerouter] += 1
                    partition = AlbaArakoonInstaller.get_db_partition(
                        storagerouter)
                    arakoon_installer = ArakoonInstaller(
                        cluster_name=nsm_cluster_name)
                    # @todo Use deploy and extend code. (Disable register nsm in those parts)
                    if sub_index == 0:
                        arakoon_installer.create_cluster(
                            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.NSM,
                            ip=storagerouter.ip,
                            base_dir=partition.folder,
                            plugins={NSM_PLUGIN: version_str})
                    else:
                        cls._logger.debug(
                            'ALBA Backend {0} - Extending NSM cluster {1}'.
                            format(alba_backend.name, nsm_cluster_name))
                        arakoon_installer.load()
                        arakoon_installer.extend_cluster(
                            new_ip=storagerouter.ip,
                            base_dir=partition.folder,
                            plugins={NSM_PLUGIN: version_str})
                    cls._logger.debug(
                        'ALBA Backend {0} - Linking plugins'.format(
                            alba_backend.name))
                    ssh_client = ssh_clients.get(storagerouter) or SSHClient(
                        StorageRouter)
                    AlbaArakoonInstaller.link_plugins(
                        client=ssh_client,
                        data_dir=partition.folder,
                        plugins=[NSM_PLUGIN],
                        cluster_name=nsm_cluster_name)
                    cls._logger.debug(
                        'ALBA Backend {0} - Modeling services'.format(
                            alba_backend.name))
                    AlbaArakoonInstaller.model_arakoon_service(
                        alba_backend=alba_backend,
                        cluster_name=nsm_cluster_name,
                        ports=arakoon_installer.ports[storagerouter.ip],
                        storagerouter=storagerouter,
                        number=number)
                    if sub_index == 0:
                        cls._logger.debug(
                            'ALBA Backend {0} - Starting cluster'.format(
                                alba_backend.name))
                        arakoon_installer.start_cluster()
                    else:
                        AlbaArakoonController._logger.debug(
                            'ALBA Backend {0} - Restarting cluster'.format(
                                alba_backend.name))
                        arakoon_installer.restart_cluster_after_extending(
                            new_ip=storagerouter.ip)
                cls._logger.debug('ALBA Backend {0} - Registering NSM'.format(
                    alba_backend.name))
                NSMInstaller.register_nsm(abm_name=abm_cluster_name,
                                          nsm_name=nsm_cluster_name,
                                          ip=storagerouters[0].ip)
                cls._logger.debug(
                    'ALBA Backend {0} - Added NSM cluster {1}'.format(
                        alba_backend.name, nsm_cluster_name))
示例#7
0
    def test_nsm_checkup_external(self):
        """
        Validates whether the NSM checkup works for externally managed Arakoon clusters
        """
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.safety', 1)
        Configuration.set('/ovs/framework/plugins/alba/config|nsm.maxload', 10)

        structure = DalHelper.build_dal_structure(structure={'storagerouters': [1, 2, 3]})
        alba_structure = AlbaDalHelper.build_dal_structure(structure={'alba_backends': [[1, 'LOCAL']]})

        alba_backend = alba_structure['alba_backends'][1]
        storagerouter_1 = structure['storagerouters'][1]
        storagerouter_2 = structure['storagerouters'][2]

        # Validate some logic for externally managed arakoons during NSM checkup
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(external_nsm_cluster_names=['test'])  # No ALBA Backend specified
        self.assertEqual(first=str(raise_info.exception), second='Additional NSMs can only be configured for a specific ALBA Backend')
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, min_internal_nsms=2, external_nsm_cluster_names=['test'])
        self.assertEqual(first=str(raise_info.exception), second="'min_internal_nsms' and 'external_nsm_cluster_names' are mutually exclusive")
        with self.assertRaises(ValueError) as raise_info:
            # noinspection PyTypeChecker
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names={})  # NSM cluster names must be a list
        self.assertEqual(first=str(raise_info.exception), second="'external_nsm_cluster_names' must be of type 'list'")
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=['non-existing-cluster'])  # non-existing cluster names should raise
        self.assertEqual(first=str(raise_info.exception), second="Arakoon cluster with name non-existing-cluster does not exist")

        # Create an external ABM and NSM Arakoon cluster
        external_abm_1 = 'backend_1-abm'
        external_nsm_1 = 'backend_1-nsm_0'
        external_nsm_2 = 'backend_1-nsm_1'
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            arakoon_installer.create_cluster(cluster_type=cluster_type, ip=storagerouter_1.ip, base_dir='/tmp', internal=False)
            arakoon_installer.extend_cluster(new_ip=storagerouter_2.ip, base_dir='/tmp')
            arakoon_installer.start_cluster()
            arakoon_installer.unclaim_cluster()
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': False},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))

        # Let the 'add_cluster` claim the externally managed clusters and model the services
        Logger._logs = {}
        AlbaController.add_cluster(alba_backend_guid=alba_backend.guid,
                                   abm_cluster=external_abm_1,
                                   nsm_clusters=[external_nsm_1])  # Only claim external_nsm_1
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': False if cluster_name == external_nsm_2 else True},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))
        log_found = False
        for log_record in Logger._logs.get('lib', []):
            if 'NSM load OK' in log_record:
                log_found = True
                break
        self.assertTrue(expr=log_found)
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertListEqual(VirtualAlbaBackend.run_log['backend_1-abm'], [['update_abm_client_config'],
                                                                           ['add_nsm_host', 'backend_1-nsm_0'],
                                                                           ['update_maintenance_config','--eviction-type-random'],
                                                                           ['update_maintenance_config','enable-auto-cleanup-deleted-namespaces-days']])

        # Add cluster already invokes a NSM checkup, so nothing should have changed
        VirtualAlbaBackend.run_log['backend_1-abm'] = []
        AlbaArakoonController.nsm_checkup()
        self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm'])

        # Overload the only NSM and run NSM checkup. This should log a critical message, but change nothing
        VirtualAlbaBackend.data['backend_1-abm']['nsms'][0]['namespaces_count'] = 25
        Logger._logs = {}
        AlbaArakoonController.nsm_checkup()
        log_found = False
        for log_record in Logger._logs.get('lib', []):
            if 'All NSM clusters are overloaded' in log_record:
                log_found = True
                break
        self.assertTrue(expr=log_found)
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertListEqual(list1=[], list2=VirtualAlbaBackend.run_log['backend_1-abm'])

        # Validate a maximum of 50 NSMs can be deployed
        current_nsms = [nsm_cluster.number for nsm_cluster in alba_backend.nsm_clusters]
        alba_structure = AlbaDalHelper.build_dal_structure(
            structure={'alba_nsm_clusters': [(1, 50)]},  # (<abackend_id>, <amount_of_nsm_clusters>)
            previous_structure=alba_structure
        )
        # Try to add 1 additional NSM
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2])
        self.assertEqual(first=str(raise_info.exception), second='The maximum of 50 NSM Arakoon clusters will be exceeded. Amount of clusters that can be deployed for this ALBA Backend: 0')

        # Remove the unused NSM clusters again
        for nsm_cluster in alba_structure['alba_nsm_clusters'][1][len(current_nsms):]:
            for nsm_service in nsm_cluster.nsm_services:
                nsm_service.delete()
                nsm_service.service.delete()
            nsm_cluster.delete()

        # Try to add a previously claimed NSM cluster
        with self.assertRaises(ValueError) as raise_info:
            AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_1])  # The provided cluster_name to claim has already been claimed
        self.assertEqual(first=str(raise_info.exception), second='Some of the provided cluster_names have already been claimed before')

        # Add a 2nd NSM cluster
        AlbaArakoonController.nsm_checkup(alba_backend_guid=alba_backend.guid, external_nsm_cluster_names=[external_nsm_2])
        self.assertEqual(first=1, second=len(alba_backend.abm_cluster.abm_services))
        self.assertEqual(first=2, second=len(alba_backend.nsm_clusters))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[0].nsm_services))
        self.assertEqual(first=1, second=len(alba_backend.nsm_clusters[1].nsm_services))
        self.assertIsNone(obj=alba_backend.abm_cluster.abm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[0].nsm_services[0].service.storagerouter)
        self.assertIsNone(obj=alba_backend.nsm_clusters[1].nsm_services[0].service.storagerouter)
        self.assertListEqual(list1=[['add_nsm_host', 'backend_1-nsm_1']], list2=VirtualAlbaBackend.run_log['backend_1-abm'])
        for cluster_name, cluster_type in {external_abm_1: 'ABM', external_nsm_1: 'NSM', external_nsm_2: 'NSM'}.iteritems():
            arakoon_installer = ArakoonInstaller(cluster_name=cluster_name)
            self.assertDictEqual(d1={'cluster_name': cluster_name,
                                     'cluster_type': cluster_type,
                                     'internal': False,
                                     'in_use': True},
                                 d2=arakoon_installer.get_arakoon_metadata_by_cluster_name(cluster_name=cluster_name))