示例#1
0
 def test_backoff_gap(self):
     """
     Validates different node distances generated (to be passed into the StorageDriver)
     """
     scenarios = {
         1 * 1024**3: {
             'backoff': int(1 * 1024**3 * 0.1),
             'trigger': int(1 * 1024**3 * 0.08)
         },
         2 * 1024**4: {
             'backoff': int(500 * 1024**3 *
                            0.1),  # Upper limits based on 500GiB volume
             'trigger': int(500 * 1024**3 * 0.08)
         },
         5: {
             'backoff': 2,
             'trigger': 1
         },
         None: {
             'backoff': 2 * 1024**3,  # Invalid size, return default
             'trigger': 1 * 1024**3
         }
     }
     for size, gap_config in scenarios.iteritems():
         self.assertDictEqual(
             StorageDriverController.generate_backoff_gap_settings(size),
             gap_config)
示例#2
0
 def mark_storagerouter_reachable_for_ha(cls, storagerouter):
     # type: (StorageRouter) -> None
     """
     Update the node distance map to add the storagerouter back into the HA pool
     :param storagerouter: Storagerouter to put back into the distance map
     :type storagerouter: StorageRouter
     :return: None
     """
     cls.logger.info("Marking Storagerouter {} as available for HA".format(
         storagerouter.name))
     Configuration.delete(os.path.join(VPOOL_UPDATE_KEY,
                                       storagerouter.guid))
     # Trigger a complete reload of node distance maps
     StorageDriverController.cluster_registry_checkup()
     # Wait a few moment for the edge to catch up all the configs
     sleep_time = cls.get_edge_sync_time()
     cls.logger.info(
         "Waiting {} to sync up all edge clients".format(sleep_time))
     time.sleep(sleep_time)
示例#3
0
 def mark_storagerouter_unreachable_for_ha(cls, storagerouter):
     """
     Update the node distance maps to
     Current code paths that update the node distance map on the volumedriver side are:
     - Update of domains
     - Update of vpool layout (extend/shrink)
     - cluster registry checkup (ran periodically)
     :return: None
     :rtype: NoneType
     """
     cls.logger.info(
         "Marking Storagerouter {} as unavailable for HA".format(
             storagerouter.name))
     # Set the value used in the storagedriver cluster node config path
     # This holds for all mentioned paths in the docstrings
     Configuration.set(os.path.join(VPOOL_UPDATE_KEY, storagerouter.guid),
                       0)
     # Trigger a complete reload of node distance maps
     StorageDriverController.cluster_registry_checkup()
     # Wait a few moment for the edge to catch up all the configs
     sleep_time = cls.get_edge_sync_time()
     cls.logger.info(
         "Waiting {} to sync up all edge clients".format(sleep_time))
     time.sleep(sleep_time)
示例#4
0
    def build_dal_structure(structure, previous_structure=None):
        """
        Builds a model structure
        Example:
            structure = DalHelper.build_service_structure(
                {'vpools': [1],
                 'domains': [],
                 'storagerouters': [1],
                 'storagedrivers': [(1, 1, 1)],  # (<id>, <vpool_id>, <storagerouter_id>)
                 'mds_services': [(1, 1)],  # (<id>, <storagedriver_id>)
                 'storagerouter_domains': []}  # (<id>, <storagerouter_id>, <domain_id>)
            )
        """
        Configuration.set(key=Configuration.EDITION_KEY,
                          value=PackageFactory.EDITION_ENTERPRISE)

        if previous_structure is None:
            previous_structure = {}
        vdisks = previous_structure.get('vdisks', {})
        vpools = previous_structure.get('vpools', {})
        domains = previous_structure.get('domains', {})
        services = previous_structure.get('services', {})
        mds_services = previous_structure.get('mds_services', {})
        storagerouters = previous_structure.get('storagerouters', {})
        storagedrivers = previous_structure.get('storagedrivers', {})
        storagerouter_domains = previous_structure.get('storagerouter_domains',
                                                       {})

        service_types = {}
        for service_type_name in ServiceType.SERVICE_TYPES.values():
            service_type = ServiceTypeList.get_by_name(service_type_name)
            if service_type is None:
                service_type = ServiceType()
                service_type.name = service_type_name
                service_type.save()
            service_types[service_type_name] = service_type
        srclients = {}
        for domain_id in structure.get('domains', []):
            if domain_id not in domains:
                domain = Domain()
                domain.name = 'domain_{0}'.format(domain_id)
                domain.save()
                domains[domain_id] = domain
        for vpool_id in structure.get('vpools', []):
            if vpool_id not in vpools:
                vpool = VPool()
                vpool.name = str(vpool_id)
                vpool.status = 'RUNNING'
                vpool.metadata = {'backend': {}, 'caching_info': {}}
                vpool.metadata_store_bits = 5
                vpool.save()
                vpools[vpool_id] = vpool
            else:
                vpool = vpools[vpool_id]
            srclients[vpool_id] = StorageRouterClient(vpool.guid, None)
            Configuration.set(
                '/ovs/vpools/{0}/mds_config|mds_tlogs'.format(vpool.guid), 100)
            Configuration.set(
                '/ovs/vpools/{0}/mds_config|mds_safety'.format(vpool.guid), 2)
            Configuration.set(
                '/ovs/vpools/{0}/mds_config|mds_maxload'.format(vpool.guid),
                75)
            Configuration.set(
                '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                    vpool.guid),
                json.dumps({}, indent=4),
                raw=True)
        for sr_id in structure.get('storagerouters', []):
            if sr_id not in storagerouters:
                storagerouter = StorageRouter()
                storagerouter.name = str(sr_id)
                storagerouter.ip = '10.0.0.{0}'.format(sr_id)
                storagerouter.rdma_capable = False
                storagerouter.node_type = 'MASTER'
                storagerouter.machine_id = str(sr_id)
                storagerouter.save()
                storagerouters[sr_id] = storagerouter
                disk = Disk()
                disk.storagerouter = storagerouter
                disk.state = 'OK'
                disk.name = '/dev/uda'
                disk.size = 1 * 1024**4
                disk.is_ssd = True
                disk.aliases = ['/dev/uda']
                disk.save()
                partition = DiskPartition()
                partition.offset = 0
                partition.size = disk.size
                partition.aliases = ['/dev/uda-1']
                partition.state = 'OK'
                partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format(
                    sr_id)
                partition.disk = disk
                partition.roles = [
                    DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB
                ]
                partition.save()
            else:
                storagerouter = storagerouters[sr_id]

            # noinspection PyProtectedMember
            System._machine_id[storagerouter.ip] = str(sr_id)
            mds_start = 10000 + 100 * (sr_id - 1)
            mds_end = 10000 + 100 * sr_id - 1
            arakoon_start = 20000 + 100 * (sr_id - 1)
            storagedriver_start = 30000 + 100 * (sr_id - 1)
            storagedriver_end = 30000 + 100 * sr_id - 1
            Configuration.initialize_host(
                host_id=sr_id,
                port_info={
                    'mds': [mds_start, mds_end],
                    'arakoon': arakoon_start,
                    'storagedriver': [storagedriver_start, storagedriver_end]
                })

        for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()):
            if sd_id not in storagedrivers:
                storagedriver = StorageDriver()
                storagedriver.vpool = vpools[vpool_id]
                storagedriver.storagerouter = storagerouters[sr_id]
                storagedriver.name = str(sd_id)
                storagedriver.mountpoint = '/'
                storagedriver.cluster_ip = storagerouters[sr_id].ip
                storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id)
                storagedriver.storagedriver_id = str(sd_id)
                storagedriver.ports = {
                    'management': 1,
                    'xmlrpc': 2,
                    'dtl': 3,
                    'edge': 4
                }
                storagedriver.save()
                storagedrivers[sd_id] = storagedriver
                DalHelper.set_vpool_storage_driver_configuration(
                    vpool=vpools[vpool_id], storagedriver=storagedriver)
        for mds_id, sd_id in structure.get('mds_services', ()):
            if mds_id not in mds_services:
                sd = storagedrivers[sd_id]
                s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id)
                service = Service()
                service.name = s_id
                service.storagerouter = sd.storagerouter
                service.ports = [mds_id]
                service.type = service_types['MetadataServer']
                service.save()
                services[s_id] = service
                mds_service = MDSService()
                mds_service.service = service
                mds_service.number = 0
                mds_service.capacity = 10
                mds_service.vpool = sd.vpool
                mds_service.save()
                mds_services[mds_id] = mds_service
                StorageDriverController.add_storagedriverpartition(
                    sd, {
                        'size': None,
                        'role': DiskPartition.ROLES.DB,
                        'sub_role': StorageDriverPartition.SUBROLE.MDS,
                        'partition': sd.storagerouter.disks[0].partitions[0],
                        'mds_service': mds_service
                    })
        for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get(
                'vdisks', ()):
            if vdisk_id not in vdisks:
                vpool = vpools[vpool_id]
                devicename = 'vdisk_{0}'.format(vdisk_id)
                mds_backend_config = DalHelper.generate_mds_metadata_backend_config(
                    [] if mds_id is None else [mds_services[mds_id]])
                volume_id = srclients[vpool_id].create_volume(
                    devicename, mds_backend_config, 0, str(storage_driver_id))
                vdisk = VDisk()
                vdisk.name = str(vdisk_id)
                vdisk.devicename = devicename
                vdisk.volume_id = volume_id
                vdisk.vpool = vpool
                vdisk.size = 0
                vdisk.save()
                vdisk.reload_client('storagedriver')
                vdisks[vdisk_id] = vdisk
        for srd_id, sr_id, domain_id, backup in structure.get(
                'storagerouter_domains', ()):
            if srd_id not in storagerouter_domains:
                sr_domain = StorageRouterDomain()
                sr_domain.backup = backup
                sr_domain.domain = domains[domain_id]
                sr_domain.storagerouter = storagerouters[sr_id]
                sr_domain.save()
                storagerouter_domains[srd_id] = sr_domain
        return {
            'vdisks': vdisks,
            'vpools': vpools,
            'domains': domains,
            'services': services,
            'mds_services': mds_services,
            'service_types': service_types,
            'storagerouters': storagerouters,
            'storagedrivers': storagedrivers,
            'storagerouter_domains': storagerouter_domains
        }
    def test_node_config_checkup(self):
        """
        Validates correct working of cluster registry checkup
        """
        base_structure = {
            '1': {
                'vrouter_id': '1',
                'message_host': '10.0.1.1',
                'message_port': 1,
                'xmlrpc_host': '10.0.0.1',
                'xmlrpc_port': 2,
                'failovercache_host': '10.0.1.1',
                'failovercache_port': 3,
                'network_server_uri': 'tcp://10.0.1.1:4',
                'node_distance_map': None
            },
            '2': {
                'vrouter_id': '2',
                'message_host': '10.0.1.2',
                'message_port': 1,
                'xmlrpc_host': '10.0.0.2',
                'xmlrpc_port': 2,
                'failovercache_host': '10.0.1.2',
                'failovercache_port': 3,
                'network_server_uri': 'tcp://10.0.1.2:4',
                'node_distance_map': None
            }
        }

        def _validate_node_config(_config, _expected_map):
            expected = copy.deepcopy(base_structure[_config.vrouter_id])
            expected['node_distance_map'] = _expected_map[_config.vrouter_id]
            self.assertDictEqual(
                expected, {
                    'vrouter_id': _config.vrouter_id,
                    'message_host': _config.message_host,
                    'message_port': _config.message_port,
                    'xmlrpc_host': _config.xmlrpc_host,
                    'xmlrpc_port': _config.xmlrpc_port,
                    'failovercache_host': _config.failovercache_host,
                    'failovercache_port': _config.failovercache_port,
                    'network_server_uri': _config.network_server_uri,
                    'node_distance_map': _config.node_distance_map
                })

        structure = DalHelper.build_dal_structure({
            'vpools': [1],
            'domains': [1, 2],
            'storagerouters': [1, 2],
            'storagedrivers':
            [(1, 1, 1), (2, 1, 2)],  # (<id>, <vpool_id>, <storagerouter_id>)
            'storagerouter_domains': [(1, 1, 1, False), (2, 2, 1, False)]
        }  # (id>, <storagerouter_id>, <domain_id>, <backup>)
                                                  )
        storagerouters = structure['storagerouters']
        vpool = structure['vpools'][1]
        arakoon_installer = ArakoonInstaller(cluster_name='voldrv')
        arakoon_installer.create_cluster(
            cluster_type=ServiceType.ARAKOON_CLUSTER_TYPES.SD,
            ip=storagerouters[1].ip,
            base_dir='/tmp')

        # Initial run, it will now be configured
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': True
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), ['1', '2'])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.NEAR
            },
            '2': {
                '1': StorageDriver.DISTANCES.NEAR
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Running it again should not change anything
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': False
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), [])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.NEAR
            },
            '2': {
                '1': StorageDriver.DISTANCES.NEAR
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Validate some error paths
        domain = structure['domains'][2]
        junction = structure['storagerouters'][1].domains[0]
        junction.domain = domain
        junction.save()
        vpool_config_path = 'file://opt/OpenvStorage/config/framework.json?key=/ovs/vpools/{0}/hosts/1/config'.format(
            vpool.guid)
        StorageRouterClient.exceptions['server_revision'] = {
            vpool_config_path: Exception('ClusterNotReachableException')
        }
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result,
                             {vpool.guid: {
                                 'success': True,
                                 'changes': True
                             }})
        self.assertListEqual(
            sorted(StorageRouterClient.node_config_recordings), ['2'])
        expected_map = {
            '1': {
                '2': StorageDriver.DISTANCES.INFINITE
            },
            '2': {
                '1': StorageDriver.DISTANCES.INFINITE
            }
        }
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)
示例#6
0
    def build_service_structure(structure, previous_structure=None):
        """
        Builds an MDS service structure
        Example:
            structure = Helper.build_service_structure(
                {'vpools': [1],
                 'domains': [],
                 'storagerouters': [1],
                 'storagedrivers': [(1, 1, 1)],  # (<id>, <vpool_id>, <storagerouter_id>)
                 'mds_services': [(1, 1)],  # (<id>, <storagedriver_id>)
                 'storagerouter_domains': []}  # (<id>, <storagerouter_id>, <domain_id>)
            )
        """
        if previous_structure is None:
            previous_structure = {}
        vdisks = previous_structure.get("vdisks", {})
        vpools = previous_structure.get("vpools", {})
        domains = previous_structure.get("domains", {})
        services = previous_structure.get("services", {})
        mds_services = previous_structure.get("mds_services", {})
        storagerouters = previous_structure.get("storagerouters", {})
        storagedrivers = previous_structure.get("storagedrivers", {})
        storagerouter_domains = previous_structure.get("storagerouter_domains", {})

        service_type = ServiceTypeList.get_by_name("MetadataServer")
        if service_type is None:
            service_type = ServiceType()
            service_type.name = "MetadataServer"
            service_type.save()
        srclients = {}
        for domain_id in structure.get("domains", []):
            if domain_id not in domains:
                domain = Domain()
                domain.name = "domain_{0}".format(domain_id)
                domain.save()
                domains[domain_id] = domain
        for vpool_id in structure.get("vpools", []):
            if vpool_id not in vpools:
                vpool = VPool()
                vpool.name = str(vpool_id)
                vpool.status = "RUNNING"
                vpool.save()
                vpools[vpool_id] = vpool
            else:
                vpool = vpools[vpool_id]
            srclients[vpool_id] = StorageRouterClient(vpool.guid, None)
        for sr_id in structure.get("storagerouters", []):
            if sr_id not in storagerouters:
                storagerouter = StorageRouter()
                storagerouter.name = str(sr_id)
                storagerouter.ip = "10.0.0.{0}".format(sr_id)
                storagerouter.rdma_capable = False
                storagerouter.node_type = "MASTER"
                storagerouter.machine_id = str(sr_id)
                storagerouter.save()
                storagerouters[sr_id] = storagerouter
                disk = Disk()
                disk.storagerouter = storagerouter
                disk.state = "OK"
                disk.name = "/dev/uda"
                disk.size = 1 * 1024 ** 4
                disk.is_ssd = True
                disk.aliases = ["/dev/uda"]
                disk.save()
                partition = DiskPartition()
                partition.offset = 0
                partition.size = disk.size
                partition.aliases = ["/dev/uda-1"]
                partition.state = "OK"
                partition.mountpoint = "/tmp/unittest/sr_{0}/disk_1/partition_1".format(sr_id)
                partition.disk = disk
                partition.roles = [DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB]
                partition.save()
        for sd_id, vpool_id, sr_id in structure.get("storagedrivers", ()):
            if sd_id not in storagedrivers:
                storagedriver = StorageDriver()
                storagedriver.vpool = vpools[vpool_id]
                storagedriver.storagerouter = storagerouters[sr_id]
                storagedriver.name = str(sd_id)
                storagedriver.mountpoint = "/"
                storagedriver.cluster_ip = storagerouters[sr_id].ip
                storagedriver.storage_ip = "10.0.1.{0}".format(sr_id)
                storagedriver.storagedriver_id = str(sd_id)
                storagedriver.ports = {"management": 1, "xmlrpc": 2, "dtl": 3, "edge": 4}
                storagedriver.save()
                storagedrivers[sd_id] = storagedriver
                Helper._set_vpool_storage_driver_configuration(vpool=vpools[vpool_id], storagedriver=storagedriver)
        for mds_id, sd_id in structure.get("mds_services", ()):
            if mds_id not in mds_services:
                sd = storagedrivers[sd_id]
                s_id = "{0}-{1}".format(sd.storagerouter.name, mds_id)
                service = Service()
                service.name = s_id
                service.storagerouter = sd.storagerouter
                service.ports = [mds_id]
                service.type = service_type
                service.save()
                services[s_id] = service
                mds_service = MDSService()
                mds_service.service = service
                mds_service.number = 0
                mds_service.capacity = 10
                mds_service.vpool = sd.vpool
                mds_service.save()
                mds_services[mds_id] = mds_service
                StorageDriverController.add_storagedriverpartition(
                    sd,
                    {
                        "size": None,
                        "role": DiskPartition.ROLES.DB,
                        "sub_role": StorageDriverPartition.SUBROLE.MDS,
                        "partition": sd.storagerouter.disks[0].partitions[0],
                        "mds_service": mds_service,
                    },
                )
        for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get("vdisks", ()):
            if vdisk_id not in vdisks:
                vpool = vpools[vpool_id]
                devicename = "vdisk_{0}".format(vdisk_id)
                mds_backend_config = Helper._generate_mdsmetadatabackendconfig(
                    [] if mds_id is None else [mds_services[mds_id]]
                )
                volume_id = srclients[vpool_id].create_volume(devicename, mds_backend_config, 0, str(storage_driver_id))
                vdisk = VDisk()
                vdisk.name = str(vdisk_id)
                vdisk.devicename = devicename
                vdisk.volume_id = volume_id
                vdisk.vpool = vpool
                vdisk.size = 0
                vdisk.save()
                vdisk.reload_client("storagedriver")
                vdisks[vdisk_id] = vdisk
        for srd_id, sr_id, domain_id, backup in structure.get("storagerouter_domains", ()):
            if srd_id not in storagerouter_domains:
                sr_domain = StorageRouterDomain()
                sr_domain.backup = backup
                sr_domain.domain = domains[domain_id]
                sr_domain.storagerouter = storagerouters[sr_id]
                sr_domain.save()
                storagerouter_domains[srd_id] = sr_domain
        return {
            "vdisks": vdisks,
            "vpools": vpools,
            "domains": domains,
            "services": services,
            "service_type": service_type,
            "mds_services": mds_services,
            "storagerouters": storagerouters,
            "storagedrivers": storagedrivers,
            "storagerouter_domains": storagerouter_domains,
        }
示例#7
0
    def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config):
        """
        Prepares an MDS service:
        * Creates the required configuration
        * Sets up the service files

        Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise
        configuration regarding both is completed.
        :param storagerouter: Storagerouter on which MDS service will be created
        :type storagerouter: StorageRouter

        :param vpool: The vPool for which the MDS service will be created
        :type vpool: VPool

        :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created
        :type fresh_only: bool

        :param reload_config: If True, the volumedriver's updated configuration will be reloaded
        :type reload_config: bool

        :return: Newly created service
        :rtype: MDSService
        """
        # Fetch service sequence number based on MDS services for current vPool and current storage router
        service_number = -1
        for mds_service in vpool.mds_services:
            if mds_service.service.storagerouter_guid == storagerouter.guid:
                service_number = max(mds_service.number, service_number)

        if fresh_only is True and service_number >= 0:
            return  # There is already 1 or more MDS services running, aborting

        # VALIDATIONS
        # 1. Find free port based on MDS services for all vPools on current storage router
        client = SSHClient(storagerouter)
        mdsservice_type = ServiceTypeList.get_by_name(
            ServiceType.SERVICE_TYPES.MD_SERVER)
        occupied_ports = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                occupied_ports.extend(service.ports)

        mds_port_range = Configuration.get(
            '/ovs/framework/hosts/{0}/ports|mds'.format(
                System.get_my_machine_id(client)))
        free_ports = System.get_free_ports(selected_range=mds_port_range,
                                           exclude=occupied_ports,
                                           nr=1,
                                           client=client)
        if not free_ports:
            raise RuntimeError(
                'Failed to find an available port on storage router {0} within range {1}'
                .format(storagerouter.name, mds_port_range))

        # 2. Partition check
        db_partition = None
        for disk in storagerouter.disks:
            for partition in disk.partitions:
                if DiskPartition.ROLES.DB in partition.roles:
                    db_partition = partition
                    break
        if db_partition is None:
            raise RuntimeError(
                'Could not find DB partition on storage router {0}'.format(
                    storagerouter.name))

        # 3. Verify storage driver configured
        storagedrivers = [
            sd for sd in vpool.storagedrivers
            if sd.storagerouter_guid == storagerouter.guid
        ]
        if not storagedrivers:
            raise RuntimeError(
                'Expected to find a configured storagedriver for vpool {0} on storage router {1}'
                .format(vpool.name, storagerouter.name))
        storagedriver = storagedrivers[0]

        # MODEL UPDATES
        # 1. Service
        service_number += 1
        service = Service()
        service.name = 'metadataserver_{0}_{1}'.format(vpool.name,
                                                       service_number)
        service.type = mdsservice_type
        service.ports = [free_ports[0]]
        service.storagerouter = storagerouter
        service.save()
        mds_service = MDSService()
        mds_service.vpool = vpool
        mds_service.number = service_number
        mds_service.service = service
        mds_service.save()

        # 2. Storage driver partitions
        from ovs.lib.storagedriver import StorageDriverController
        StorageDriverController.add_storagedriverpartition(
            storagedriver, {
                'size': None,
                'role': DiskPartition.ROLES.DB,
                'sub_role': StorageDriverPartition.SUBROLE.MDS,
                'partition': db_partition,
                'mds_service': mds_service
            })

        # CONFIGURATIONS
        # 1. Volumedriver
        mds_nodes = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                mds_service = service.mds_service
                if mds_service is not None:
                    if mds_service.vpool_guid == vpool.guid:
                        sdp = [
                            sd_partition for sd_partition in
                            mds_service.storagedriver_partitions
                            if sd_partition.role == DiskPartition.ROLES.DB
                            and sd_partition.sub_role ==
                            StorageDriverPartition.SUBROLE.MDS
                        ][0]
                        mds_nodes.append({
                            'host': service.storagerouter.ip,
                            'port': service.ports[0],
                            'db_directory': sdp.path,
                            'scratch_directory': sdp.path
                        })

        # Generate the correct section in the Storage Driver's configuration
        storagedriver_config = StorageDriverConfiguration(
            'storagedriver', vpool.guid, storagedriver.storagedriver_id)
        storagedriver_config.load()
        storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes)
        storagedriver_config.save(client, reload_config=reload_config)

        return mds_service
 def voldrv_arakoon_checkup():
     """
     Execute the scheduled task voldrv arakoon checkup
     :return: None
     """
     StorageDriverController.scheduled_voldrv_arakoon_checkup()  # No API available
示例#9
0
    def build_service_structure(structure, previous_structure=None):
        """
        Builds an MDS service structure
        Example:
            structure = Helper.build_service_structure(
                {'vpools': [1],
                 'domains': [],
                 'storagerouters': [1],
                 'storagedrivers': [(1, 1, 1)],  # (<id>, <vpool_id>, <storagerouter_id>)
                 'mds_services': [(1, 1)],  # (<id>, <storagedriver_id>)
                 'storagerouter_domains': []}  # (<id>, <storagerouter_id>, <domain_id>)
            )
        """
        if previous_structure is None:
            previous_structure = {}
        vdisks = previous_structure.get('vdisks', {})
        vpools = previous_structure.get('vpools', {})
        domains = previous_structure.get('domains', {})
        services = previous_structure.get('services', {})
        mds_services = previous_structure.get('mds_services', {})
        storagerouters = previous_structure.get('storagerouters', {})
        storagedrivers = previous_structure.get('storagedrivers', {})
        storagerouter_domains = previous_structure.get('storagerouter_domains',
                                                       {})

        service_type = ServiceTypeList.get_by_name('MetadataServer')
        if service_type is None:
            service_type = ServiceType()
            service_type.name = 'MetadataServer'
            service_type.save()
        srclients = {}
        for domain_id in structure.get('domains', []):
            if domain_id not in domains:
                domain = Domain()
                domain.name = 'domain_{0}'.format(domain_id)
                domain.save()
                domains[domain_id] = domain
        for vpool_id in structure.get('vpools', []):
            if vpool_id not in vpools:
                vpool = VPool()
                vpool.name = str(vpool_id)
                vpool.status = 'RUNNING'
                vpool.save()
                vpools[vpool_id] = vpool
            else:
                vpool = vpools[vpool_id]
            srclients[vpool_id] = StorageRouterClient(vpool.guid, None)
        for sr_id in structure.get('storagerouters', []):
            if sr_id not in storagerouters:
                storagerouter = StorageRouter()
                storagerouter.name = str(sr_id)
                storagerouter.ip = '10.0.0.{0}'.format(sr_id)
                storagerouter.rdma_capable = False
                storagerouter.node_type = 'MASTER'
                storagerouter.machine_id = str(sr_id)
                storagerouter.save()
                storagerouters[sr_id] = storagerouter
                disk = Disk()
                disk.storagerouter = storagerouter
                disk.state = 'OK'
                disk.name = '/dev/uda'
                disk.size = 1 * 1024**4
                disk.is_ssd = True
                disk.aliases = ['/dev/uda']
                disk.save()
                partition = DiskPartition()
                partition.offset = 0
                partition.size = disk.size
                partition.aliases = ['/dev/uda-1']
                partition.state = 'OK'
                partition.mountpoint = '/tmp/unittest/sr_{0}/disk_1/partition_1'.format(
                    sr_id)
                partition.disk = disk
                partition.roles = [
                    DiskPartition.ROLES.DB, DiskPartition.ROLES.SCRUB
                ]
                partition.save()
        for sd_id, vpool_id, sr_id in structure.get('storagedrivers', ()):
            if sd_id not in storagedrivers:
                storagedriver = StorageDriver()
                storagedriver.vpool = vpools[vpool_id]
                storagedriver.storagerouter = storagerouters[sr_id]
                storagedriver.name = str(sd_id)
                storagedriver.mountpoint = '/'
                storagedriver.cluster_ip = storagerouters[sr_id].ip
                storagedriver.storage_ip = '10.0.1.{0}'.format(sr_id)
                storagedriver.storagedriver_id = str(sd_id)
                storagedriver.ports = {
                    'management': 1,
                    'xmlrpc': 2,
                    'dtl': 3,
                    'edge': 4
                }
                storagedriver.save()
                storagedrivers[sd_id] = storagedriver
                Helper._set_vpool_storage_driver_configuration(
                    vpool=vpools[vpool_id], storagedriver=storagedriver)
        for mds_id, sd_id in structure.get('mds_services', ()):
            if mds_id not in mds_services:
                sd = storagedrivers[sd_id]
                s_id = '{0}-{1}'.format(sd.storagerouter.name, mds_id)
                service = Service()
                service.name = s_id
                service.storagerouter = sd.storagerouter
                service.ports = [mds_id]
                service.type = service_type
                service.save()
                services[s_id] = service
                mds_service = MDSService()
                mds_service.service = service
                mds_service.number = 0
                mds_service.capacity = 10
                mds_service.vpool = sd.vpool
                mds_service.save()
                mds_services[mds_id] = mds_service
                StorageDriverController.add_storagedriverpartition(
                    sd, {
                        'size': None,
                        'role': DiskPartition.ROLES.DB,
                        'sub_role': StorageDriverPartition.SUBROLE.MDS,
                        'partition': sd.storagerouter.disks[0].partitions[0],
                        'mds_service': mds_service
                    })
        for vdisk_id, storage_driver_id, vpool_id, mds_id in structure.get(
                'vdisks', ()):
            if vdisk_id not in vdisks:
                vpool = vpools[vpool_id]
                devicename = 'vdisk_{0}'.format(vdisk_id)
                mds_backend_config = Helper._generate_mdsmetadatabackendconfig(
                    [] if mds_id is None else [mds_services[mds_id]])
                volume_id = srclients[vpool_id].create_volume(
                    devicename, mds_backend_config, 0, str(storage_driver_id))
                vdisk = VDisk()
                vdisk.name = str(vdisk_id)
                vdisk.devicename = devicename
                vdisk.volume_id = volume_id
                vdisk.vpool = vpool
                vdisk.size = 0
                vdisk.save()
                vdisk.reload_client('storagedriver')
                vdisks[vdisk_id] = vdisk
        for srd_id, sr_id, domain_id, backup in structure.get(
                'storagerouter_domains', ()):
            if srd_id not in storagerouter_domains:
                sr_domain = StorageRouterDomain()
                sr_domain.backup = backup
                sr_domain.domain = domains[domain_id]
                sr_domain.storagerouter = storagerouters[sr_id]
                sr_domain.save()
                storagerouter_domains[srd_id] = sr_domain
        return {
            'vdisks': vdisks,
            'vpools': vpools,
            'domains': domains,
            'services': services,
            'service_type': service_type,
            'mds_services': mds_services,
            'storagerouters': storagerouters,
            'storagedrivers': storagedrivers,
            'storagerouter_domains': storagerouter_domains
        }
示例#10
0
    def prepare_mds_service(client, storagerouter, vpool, fresh_only=True, reload_config=False):
        """
        Prepares an MDS service:
        * Creates the required configuration
        * Sets up the service files

        Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise
        configuration regarding both is completed.
        """
        from ovs.lib.storagedriver import StorageDriverController

        mdsservice_type = ServiceTypeList.get_by_name('MetadataServer')
        storagedriver = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid][0]

        # Fetch service sequence number
        service_number = -1
        for mds_service in vpool.mds_services:
            if mds_service.service.storagerouter_guid == storagerouter.guid:
                service_number = max(mds_service.number, service_number)

        if fresh_only is True and service_number >= 0:
            return None  # There are already one or more MDS services running, aborting
        service_number += 1

        # Find free port
        occupied_ports = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                occupied_ports.append(service.ports[0])
        port = System.get_free_ports(Configuration.get('ovs.ports.mds'),
                                     exclude=occupied_ports, nr=1, client=client)[0]

        # Add service to the model
        service = DalService()
        service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number)
        service.type = mdsservice_type
        service.storagerouter = storagerouter
        service.ports = [port]
        service.save()
        mds_service = MDSService()
        mds_service.service = service
        mds_service.vpool = vpool
        mds_service.number = service_number
        mds_service.save()
        scrub_partition = None
        db_partition = None
        for disk in storagerouter.disks:
            for partition in disk.partitions:
                if DiskPartition.ROLES.DB in partition.roles:
                    db_partition = partition
                if DiskPartition.ROLES.SCRUB in partition.roles:
                    scrub_partition = partition
        if scrub_partition is None or db_partition is None:
            raise RuntimeError('Could not find DB or SCRUB partition on StorageRouter {0}'.format(storagerouter.name))
        StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None,
                                                                           'role': DiskPartition.ROLES.DB,
                                                                           'sub_role': StorageDriverPartition.SUBROLE.MDS,
                                                                           'partition': db_partition,
                                                                           'mds_service': mds_service})
        StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None,
                                                                           'role': DiskPartition.ROLES.SCRUB,
                                                                           'sub_role': StorageDriverPartition.SUBROLE.MDS,
                                                                           'partition': scrub_partition,
                                                                           'mds_service': mds_service})
        mds_nodes = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                mds_service = service.mds_service
                if mds_service.vpool_guid == vpool.guid:
                    mds_nodes.append({'host': service.storagerouter.ip,
                                      'port': service.ports[0],
                                      'db_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions
                                                       if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0],
                                      'scratch_directory': [sd_partition.path for sd_partition in mds_service.storagedriver_partitions
                                                            if sd_partition.role == DiskPartition.ROLES.SCRUB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS][0]})

        # Generate the correct section in the Storage Driver's configuration
        storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name)
        storagedriver_config.load(client)
        storagedriver_config.clean()  # Clean out obsolete values
        storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes)
        storagedriver_config.save(client, reload_config=reload_config)

        return mds_service
示例#11
0
    def configure_storagedriver_service(self):
        """
        Configure the StorageDriver service
        :return: None
        :rtype: NoneType
        """
        def _generate_queue_urls():
            mq_user = Configuration.get('/ovs/framework/messagequeue|user')
            mq_protocol = Configuration.get('/ovs/framework/messagequeue|protocol')
            mq_password = Configuration.get('/ovs/framework/messagequeue|password')
            return [{'amqp_uri': '{0}://{1}:{2}@{3}:5672'.format(mq_protocol, mq_user, mq_password, sr.ip)} for sr in StorageRouterList.get_masters()]

        def _generate_config_file_system():
            config = {'fs_dtl_host': '',
                      'fs_enable_shm_interface': 0,
                      'fs_enable_network_interface': 1,
                      'fs_metadata_backend_arakoon_cluster_nodes': [],
                      'fs_metadata_backend_mds_nodes': [],
                      'fs_metadata_backend_type': 'MDS',
                      'fs_virtual_disk_format': 'raw',
                      'fs_raw_disk_suffix': '.raw',
                      'fs_file_event_rules': [{'fs_file_event_rule_calls': ['Rename'],
                                               'fs_file_event_rule_path_regex': '.*'}]}
            if self.dtl_mode == StorageDriverClient.FRAMEWORK_DTL_NO_SYNC:
                config['fs_dtl_config_mode'] = StorageDriverClient.VOLDRV_DTL_MANUAL_MODE
            else:
                config['fs_dtl_mode'] = StorageDriverClient.VPOOL_DTL_MODE_MAP[self.dtl_mode]
                config['fs_dtl_config_mode'] = StorageDriverClient.VOLDRV_DTL_AUTOMATIC_MODE
            return config

        def _generate_config_backend_connection_manager():
            config = {'backend_type': 'MULTI',
                      'backend_interface_retries_on_error': 5,
                      'backend_interface_retry_interval_secs': 1,
                      'backend_interface_retry_backoff_multiplier': 2.0}
            for index, proxy in enumerate(sorted(self.storagedriver.alba_proxies, key=lambda k: k.service.ports[0])):
                config[str(index)] = {'alba_connection_host': self.storagedriver.storage_ip,
                                      'alba_connection_port': proxy.service.ports[0],
                                      'alba_connection_preset': vpool.metadata['backend']['backend_info']['preset'],
                                      'alba_connection_timeout': 30,
                                      'alba_connection_use_rora': True,
                                      'alba_connection_transport': 'TCP',
                                      'alba_connection_rora_manifest_cache_capacity': 25000,
                                      'alba_connection_asd_connection_pool_capacity': 10,
                                      'alba_connection_rora_timeout_msecs': 50,
                                      'backend_type': 'ALBA'}
            return config

        if self.sr_installer is None:
            raise RuntimeError('No StorageRouterInstaller instance found')
        if len(self.write_caches) == 0:
            raise RuntimeError('The StorageDriverPartition junctions have not been created yet')

        vpool = self.vp_installer.vpool
        gap_configuration = StorageDriverController.calculate_trigger_and_backoff_gap(cache_size=self.sr_installer.smallest_write_partition_size)
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|voldrv'))
        arakoon_nodes = [{'host': node.ip,
                          'port': node.client_port,
                          'node_id': node.name} for node in ArakoonClusterConfig(cluster_id=arakoon_cluster_name).nodes]

        storagedriver_config = StorageDriverConfiguration(vpool.guid, self.storagedriver.storagedriver_id)
        storagedriver_config.configure_scocache(scocache_mount_points=self.write_caches,
                                                trigger_gap=ExtensionsToolbox.convert_byte_size_to_human_readable(size=gap_configuration['trigger']),
                                                backoff_gap=ExtensionsToolbox.convert_byte_size_to_human_readable(size=gap_configuration['backoff']))
        storagedriver_config.configure_file_driver(fd_cache_path=self.storagedriver_partition_file_driver.path,
                                                   fd_extent_cache_capacity='1024',
                                                   fd_namespace='fd-{0}-{1}'.format(vpool.name, vpool.guid))
        storagedriver_config.configure_volume_router(vrouter_id=self.storagedriver.storagedriver_id,
                                                     vrouter_redirect_timeout_ms='120000',
                                                     vrouter_keepalive_time_secs='15',
                                                     vrouter_keepalive_interval_secs='5',
                                                     vrouter_keepalive_retries='2',
                                                     vrouter_routing_retries=10,
                                                     vrouter_volume_read_threshold=0,
                                                     vrouter_volume_write_threshold=0,
                                                     vrouter_file_read_threshold=0,
                                                     vrouter_file_write_threshold=0,
                                                     vrouter_min_workers=4,
                                                     vrouter_max_workers=16,
                                                     vrouter_sco_multiplier=self.sco_size * 1024 / self.cluster_size,
                                                     vrouter_backend_sync_timeout_ms=60000,
                                                     vrouter_migrate_timeout_ms=60000,
                                                     vrouter_use_fencing=True)
        storagedriver_config.configure_volume_manager(tlog_path=self.storagedriver_partition_tlogs.path,
                                                      metadata_path=self.storagedriver_partition_metadata.path,
                                                      clean_interval=1,
                                                      dtl_throttle_usecs=4000,
                                                      default_cluster_size=self.cluster_size * 1024,
                                                      number_of_scos_in_tlog=self.tlog_multiplier,
                                                      non_disposable_scos_factor=float(self.write_buffer) / self.tlog_multiplier / self.sco_size)
        storagedriver_config.configure_event_publisher(events_amqp_routing_key=Configuration.get('/ovs/framework/messagequeue|queues.storagedriver'),
                                                       events_amqp_uris=_generate_queue_urls())
        storagedriver_config.configure_volume_registry(vregistry_arakoon_cluster_id=arakoon_cluster_name,
                                                       vregistry_arakoon_cluster_nodes=arakoon_nodes)
        storagedriver_config.configure_network_interface(network_max_neighbour_distance=StorageDriver.DISTANCES.FAR - 1)
        storagedriver_config.configure_threadpool_component(num_threads=16)
        storagedriver_config.configure_volume_router_cluster(vrouter_cluster_id=vpool.guid)
        storagedriver_config.configure_distributed_lock_store(dls_type='Arakoon',
                                                              dls_arakoon_cluster_id=arakoon_cluster_name,
                                                              dls_arakoon_cluster_nodes=arakoon_nodes)
        storagedriver_config.configure_content_addressed_cache(serialize_read_cache=False,
                                                               read_cache_serialization_path=[])
        storagedriver_config.configure_distributed_transaction_log(dtl_path=self.storagedriver_partition_dtl.path,  # Not used, but required
                                                                   dtl_transport=StorageDriverClient.VPOOL_DTL_TRANSPORT_MAP[self.dtl_transport])

        storagedriver_config.configure_filesystem(**_generate_config_file_system())
        storagedriver_config.configure_backend_connection_manager(**_generate_config_backend_connection_manager())

        storagedriver_config.save(client=self.sr_installer.root_client)
示例#12
0
    def create_partitions(self):
        """
        Configure all partitions for a StorageDriver (junctions between a StorageDriver and a DiskPartition)
        :raises: ValueError: - When calculating the cache sizes went wrong
        :return: Dict with information about the created items
        :rtype: dict
        """
        if self.storagedriver is None:
            raise RuntimeError('A StorageDriver needs to be created first')
        if self.sr_installer is None:
            raise RuntimeError('No StorageRouterInstaller instance found')

        # Assign WRITE / Fragment cache
        for writecache_info in self.sr_installer.write_partitions:
            available = writecache_info['available']
            partition = DiskPartition(writecache_info['guid'])
            proportion = available * 100.0 / self.sr_installer.global_write_buffer_available_size
            size_to_be_used = proportion * self.sr_installer.global_write_buffer_requested_size / 100
            write_cache_percentage = 0.98
            if self.sr_installer.requested_local_proxies > 0 and partition == self.sr_installer.largest_write_partition:  # At least 1 local proxy has been requested either for fragment or block cache
                self.cache_size_local = int(size_to_be_used * 0.10)  # Bytes
                write_cache_percentage = 0.88
                for _ in xrange(self.sr_installer.requested_proxies):
                    storagedriver_partition_cache = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver,
                                                                                                       partition_info={'size': None,
                                                                                                                       'role': DiskPartition.ROLES.WRITE,
                                                                                                                       'sub_role': StorageDriverPartition.SUBROLE.FCACHE,
                                                                                                                       'partition': partition})
                    self.sr_installer.created_dirs.append(storagedriver_partition_cache.path)
                    if self.block_cache_local is True:
                        self.sr_installer.created_dirs.append('{0}/bc'.format(storagedriver_partition_cache.path))
                    if self.fragment_cache_local is True:
                        self.sr_installer.created_dirs.append('{0}/fc'.format(storagedriver_partition_cache.path))
                    self.storagedriver_partitions_caches.append(storagedriver_partition_cache)

            w_size = int(size_to_be_used * write_cache_percentage / 1024 / 4096) * 4096
            storagedriver_partition_write = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver,
                                                                                               partition_info={'size': long(size_to_be_used),
                                                                                                               'role': DiskPartition.ROLES.WRITE,
                                                                                                               'sub_role': StorageDriverPartition.SUBROLE.SCO,
                                                                                                               'partition': partition})
            self.write_caches.append({'path': storagedriver_partition_write.path,
                                      'size': '{0}KiB'.format(w_size)})
            self.sr_installer.created_dirs.append(storagedriver_partition_write.path)
            if self.sr_installer.smallest_write_partition_size in [0, None] or (w_size * 1024) < self.sr_installer.smallest_write_partition_size:
                self.sr_installer.smallest_write_partition_size = w_size * 1024

        # Verify cache size
        if self.cache_size_local is None and (self.block_cache_local is True or self.fragment_cache_local is True):
            raise ValueError('Something went wrong trying to calculate the cache sizes')

        # Assign FD partition
        self.storagedriver_partition_file_driver = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver,
                                                                                                      partition_info={'size': None,
                                                                                                                      'role': DiskPartition.ROLES.WRITE,
                                                                                                                      'sub_role': StorageDriverPartition.SUBROLE.FD,
                                                                                                                      'partition': self.sr_installer.largest_write_partition})
        self.sr_installer.created_dirs.append(self.storagedriver_partition_file_driver.path)

        # Assign DB partition
        db_info = self.sr_installer.partition_info[DiskPartition.ROLES.DB][0]
        self.storagedriver_partition_tlogs = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver,
                                                                                                partition_info={'size': None,
                                                                                                                'role': DiskPartition.ROLES.DB,
                                                                                                                'sub_role': StorageDriverPartition.SUBROLE.TLOG,
                                                                                                                'partition': DiskPartition(db_info['guid'])})
        self.storagedriver_partition_metadata = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver,
                                                                                                   partition_info={'size': None,
                                                                                                                   'role': DiskPartition.ROLES.DB,
                                                                                                                   'sub_role': StorageDriverPartition.SUBROLE.MD,
                                                                                                                   'partition': DiskPartition(db_info['guid'])})
        self.sr_installer.created_dirs.append(self.storagedriver_partition_tlogs.path)
        self.sr_installer.created_dirs.append(self.storagedriver_partition_metadata.path)

        # Assign DTL
        dtl_info = self.sr_installer.partition_info[DiskPartition.ROLES.DTL][0]
        self.storagedriver_partition_dtl = StorageDriverController.add_storagedriverpartition(storagedriver=self.storagedriver,
                                                                                              partition_info={'size': None,
                                                                                                              'role': DiskPartition.ROLES.DTL,
                                                                                                              'partition': DiskPartition(dtl_info['guid'])})
        self.sr_installer.created_dirs.append(self.storagedriver_partition_dtl.path)
        self.sr_installer.created_dirs.append(self.storagedriver.mountpoint)

        # Create the directories
        self.sr_installer.root_client.dir_create(directories=self.sr_installer.created_dirs)
示例#13
0
    def test_node_config_checkup(self):
        """
        Validates correct working of cluster registry checkup
        """
        base_structure = {'1': {'vrouter_id': '1',
                                'message_host': '10.0.1.1',
                                'message_port': 1,
                                'xmlrpc_host': '10.0.0.1',
                                'xmlrpc_port': 2,
                                'failovercache_host': '10.0.1.1',
                                'failovercache_port': 3,
                                'network_server_uri': 'tcp://10.0.1.1:4',
                                'node_distance_map': None},
                          '2': {'vrouter_id': '2',
                                'message_host': '10.0.1.2',
                                'message_port': 1,
                                'xmlrpc_host': '10.0.0.2',
                                'xmlrpc_port': 2,
                                'failovercache_host': '10.0.1.2',
                                'failovercache_port': 3,
                                'network_server_uri': 'tcp://10.0.1.2:4',
                                'node_distance_map': None}}

        def _validate_node_config(_config, _expected_map):
            expected = copy.deepcopy(base_structure[_config.vrouter_id])
            expected['node_distance_map'] = _expected_map[_config.vrouter_id]
            self.assertDictEqual(expected, {'vrouter_id': _config.vrouter_id,
                                            'message_host': _config.message_host,
                                            'message_port': _config.message_port,
                                            'xmlrpc_host': _config.xmlrpc_host,
                                            'xmlrpc_port': _config.xmlrpc_port,
                                            'failovercache_host': _config.failovercache_host,
                                            'failovercache_port': _config.failovercache_port,
                                            'network_server_uri': _config.network_server_uri,
                                            'node_distance_map': _config.node_distance_map})

        structure = Helper.build_service_structure(
            {'vpools': [1],
             'domains': [1, 2],
             'storagerouters': [1, 2],
             'storagedrivers': [(1, 1, 1), (2, 1, 2)],  # (<id>, <vpool_id>, <storagerouter_id>)
             'storagerouter_domains': [(1, 1, 1, False), (2, 2, 1, False)]}  # (id>, <storagerouter_id>, <domain_id>, <backup>)
        )
        storagerouters = structure['storagerouters']
        vpool = structure['vpools'][1]
        System._machine_id = {storagerouters[1].ip: '1',
                              storagerouters[2].ip: '2'}
        ArakoonInstaller.create_cluster('voldrv', ServiceType.ARAKOON_CLUSTER_TYPES.SD, storagerouters[1].ip, '/tmp')

        # Initial run, it will now be configured
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result, {vpool.guid: {'success': True,
                                                   'changes': True}})
        self.assertListEqual(sorted(StorageRouterClient.node_config_recordings), ['1', '2'])
        expected_map = {'1': {'2': StorageDriver.DISTANCES.NEAR},
                        '2': {'1': StorageDriver.DISTANCES.NEAR}}
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Running it again should not change anything
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result, {vpool.guid: {'success': True,
                                                   'changes': False}})
        self.assertListEqual(sorted(StorageRouterClient.node_config_recordings), [])
        expected_map = {'1': {'2': StorageDriver.DISTANCES.NEAR},
                        '2': {'1': StorageDriver.DISTANCES.NEAR}}
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)

        # Validate some error paths
        domain = structure['domains'][2]
        junction = structure['storagerouters'][1].domains[0]
        junction.domain = domain
        junction.save()
        vpool_config_path = 'file://opt/OpenvStorage/config/framework.json?key=/ovs/vpools/{0}/hosts/1/config'.format(vpool.guid)
        StorageRouterClient.exceptions['server_revision'] = {vpool_config_path: Exception('ClusterNotReachableException')}
        StorageRouterClient.node_config_recordings = []
        result = StorageDriverController.cluster_registry_checkup()
        self.assertDictEqual(result, {vpool.guid: {'success': True,
                                                   'changes': True}})
        self.assertListEqual(sorted(StorageRouterClient.node_config_recordings), ['2'])
        expected_map = {'1': {'2': StorageDriver.DISTANCES.INFINITE},
                        '2': {'1': StorageDriver.DISTANCES.INFINITE}}
        configs = vpool.clusterregistry_client.get_node_configs()
        for config in configs:
            _validate_node_config(config, expected_map)
示例#14
0
    def prepare_mds_service(cls, storagerouter, vpool):
        """
        Prepares an MDS service:
            * Creates the required configuration
            * Sets up the service files
        Assumes the StorageRouter and vPool are already configured with a StorageDriver and that all model-wise configurations regarding both have been completed.

        :param storagerouter: StorageRouter on which the MDS service will be created
        :type storagerouter: ovs.dal.hybrids.storagerouter.StorageRouter
        :param vpool: The vPool for which the MDS service will be created
        :type vpool: ovs.dal.hybrids.vpool.VPool
        :raises RuntimeError: vPool is not extended on StorageRouter
                              No ServiceType found for 'MetadataServer'
                              No free port is found for the new MDSService
                              No partition found on StorageRouter with DB role
        :return: Newly created junction service
        :rtype: ovs.dal.hybrids.j_mdsservice.MDSService
        """
        from ovs.lib.storagedriver import StorageDriverController  # Import here to prevent from circular imports

        cls._logger.info('StorageRouter {0} - vPool {1}: Preparing MDS junction service'.format(storagerouter.name, vpool.name))

        mds_service = MDSService()
        with volatile_mutex(name='prepare_mds_{0}'.format(storagerouter.guid), wait=30):
            # VALIDATIONS
            # Verify passed StorageRouter is part of the vPool
            storagerouter.invalidate_dynamics(['vpools_guids'])
            if vpool.guid not in storagerouter.vpools_guids:
                raise RuntimeError('StorageRouter {0} is not part of vPool {1}'.format(storagerouter.name, vpool.name))

            # Verify ServiceType existence
            mds_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER)
            if mds_service_type is None:
                raise RuntimeError('No ServiceType found with name {0}'.format(ServiceType.SERVICE_TYPES.MD_SERVER))

            # Retrieve occupied ports for current StorageRouter and max MDSService number for current vPool/StorageRouter combo
            service_number = -1
            occupied_ports = []
            for service in mds_service_type.services:
                if service.storagerouter_guid == storagerouter.guid:
                    occupied_ports.extend(service.ports)
                    if service.mds_service.vpool_guid == vpool.guid:
                        service_number = max(service.mds_service.number, service_number)

            client = SSHClient(endpoint=storagerouter)
            mds_port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|mds'.format(System.get_my_machine_id(client)))
            free_ports = System.get_free_ports(selected_range=mds_port_range,
                                               exclude=occupied_ports,
                                               amount=1,
                                               client=client)
            if len(free_ports) != 1:
                raise RuntimeError('Failed to find an available port on StorageRouter {0} within range {1}'.format(storagerouter.name, mds_port_range))

            # Partition check
            db_partition = None
            for disk in storagerouter.disks:
                for partition in disk.partitions:
                    if DiskPartition.ROLES.DB in partition.roles:
                        db_partition = partition
                        break
            if db_partition is None:
                raise RuntimeError('Could not find DB partition on StorageRouter {0}'.format(storagerouter.name))

            # Verify StorageDriver configured
            storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid]
            if len(storagedrivers) != 1:
                raise RuntimeError('Expected to find a configured StorageDriver for vPool {0} on StorageRouter {1}'.format(vpool.name, storagerouter.name))

            # MODEL UPDATES
            # Service and MDS service
            service_number += 1
            cls._logger.info('StorageRouter {0} - vPool {1}: Adding junction service with number {2}'.format(storagerouter.name, vpool.name, service_number))

            service = Service()
            service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number)
            service.type = mds_service_type
            service.ports = free_ports
            service.storagerouter = storagerouter
            service.save()
            mds_service.vpool = vpool
            mds_service.number = service_number
            mds_service.service = service
            mds_service.save()

            # StorageDriver partitions
            cls._logger.info('StorageRouter {0} - vPool {1}: Adding StorageDriverPartition on partition with mount point {2}'.format(storagerouter.name, vpool.name, db_partition.mountpoint))
            storagedriver = storagedrivers[0]
            sdp = StorageDriverController.add_storagedriverpartition(storagedriver, {'size': None,
                                                                                     'role': DiskPartition.ROLES.DB,
                                                                                     'sub_role': StorageDriverPartition.SUBROLE.MDS,
                                                                                     'partition': db_partition,
                                                                                     'mds_service': mds_service})

            # CONFIGURATIONS
            # Volumedriver
            mds_nodes = []
            for sd_partition in storagedriver.partitions:
                if sd_partition.role == DiskPartition.ROLES.DB and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS and sd_partition.mds_service is not None:
                    service = sd_partition.mds_service.service
                    mds_nodes.append({'host': service.storagerouter.ip,
                                      'port': service.ports[0],
                                      'db_directory': '{0}/db'.format(sd_partition.path),
                                      'scratch_directory': '{0}/scratch'.format(sd_partition.path)})

            cls._logger.info('StorageRouter {0} - vPool {1}: Configuring StorageDriver with MDS nodes: {2}'.format(storagerouter.name, vpool.name, mds_nodes))
            # Generate the correct section in the StorageDriver's configuration
            try:
                storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
                storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes)
                storagedriver_config.save(client)
            except Exception:
                cls._logger.exception('StorageRouter {0} - vPool {1}: Configuring StorageDriver failed. Reverting model changes'.format(storagerouter.name, vpool.name))
                # Clean up model changes if error occurs
                sdp.delete()
                mds_service.delete()  # Must be removed before the service
                service.delete()
        return mds_service
示例#15
0
    def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config):
        """
        Prepares an MDS service:
        * Creates the required configuration
        * Sets up the service files

        Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise
        configuration regarding both is completed.
        :param storagerouter: Storagerouter on which MDS service will be created
        :param vpool:         The vPool for which the MDS service will be created
        :param fresh_only:    If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created
        :param reload_config: If True, the volumedriver's updated configuration will be reloaded
        """
        # Fetch service sequence number based on MDS services for current vPool and current storage router
        service_number = -1
        for mds_service in vpool.mds_services:
            if mds_service.service.storagerouter_guid == storagerouter.guid:
                service_number = max(mds_service.number, service_number)

        if fresh_only is True and service_number >= 0:
            return  # There is already 1 or more MDS services running, aborting

        # VALIDATIONS
        # 1. Find free port based on MDS services for all vPools on current storage router
        client = SSHClient(storagerouter)
        mdsservice_type = ServiceTypeList.get_by_name('MetadataServer')
        occupied_ports = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                occupied_ports.extend(service.ports)

        mds_port_range = client.config_read('ovs.ports.mds')
        free_ports = System.get_free_ports(selected_range=mds_port_range,
                                           exclude=occupied_ports,
                                           nr=1,
                                           client=client)
        if not free_ports:
            raise RuntimeError('Failed to find an available port on storage router {0} within range {1}'.format(storagerouter.name, mds_port_range))

        # 2. Partition check
        db_partition = None
        for disk in storagerouter.disks:
            for partition in disk.partitions:
                if DiskPartition.ROLES.DB in partition.roles:
                    db_partition = partition
                    break
        if db_partition is None:
            raise RuntimeError('Could not find DB partition on storage router {0}'.format(storagerouter.name))

        # 3. Verify storage driver configured
        storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid]
        if not storagedrivers:
            raise RuntimeError('Expected to find a configured storagedriver for vpool {0} on storage router {1}'.format(vpool.name, storagerouter.name))

        # MODEL UPDATES
        # 1. Service
        service_number += 1
        service = Service()
        service.name = 'metadataserver_{0}_{1}'.format(vpool.name, service_number)
        service.type = mdsservice_type
        service.ports = [free_ports[0]]
        service.storagerouter = storagerouter
        service.save()
        mds_service = MDSService()
        mds_service.vpool = vpool
        mds_service.number = service_number
        mds_service.service = service
        mds_service.save()

        # 2. Storage driver partitions
        from ovs.lib.storagedriver import StorageDriverController
        sdp = StorageDriverController.add_storagedriverpartition(storagedrivers[0], {'size': None,
                                                                                     'role': DiskPartition.ROLES.DB,
                                                                                     'sub_role': StorageDriverPartition.SUBROLE.MDS,
                                                                                     'partition': db_partition,
                                                                                     'mds_service': mds_service})

        # CONFIGURATIONS
        # 1. Volumedriver
        mds_nodes = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                mds_service = service.mds_service
                if mds_service.vpool_guid == vpool.guid:
                    mds_nodes.append({'host': service.storagerouter.ip,
                                      'port': service.ports[0],
                                      'db_directory': sdp.path,
                                      'scratch_directory': sdp.path})

        # Generate the correct section in the Storage Driver's configuration
        storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.name)
        storagedriver_config.load(client)
        storagedriver_config.clean()  # Clean out obsolete values
        storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes)
        storagedriver_config.save(client, reload_config=reload_config)

        return mds_service
示例#16
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.storagerouter import StorageRouterController
        from ovs.dal.lists.storagerouterlist import StorageRouterList

        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove node", boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages="WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n",
        )

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError("Node IP must be a string")
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError("Invalid IP {0} specified".format(node_ip))

            storage_router_all = StorageRouterList.get_storagerouters()
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set([storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([storage_router.ip for storage_router in storage_router_masters])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)

            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    "Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}".format(
                        "\n - ".join(storage_router_all_ips), node_ip
                    )
                )

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(storage_router_master_ips) == 1:
                raise RuntimeError("Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    "The node to be removed cannot be identical to the node on which the removal is initiated"
                )

            Toolbox.log(
                logger=NodeRemovalController._logger, messages="Creating SSH connections to remaining master nodes"
            )
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router, username="******")
                    if client.run(["pwd"]):
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages="  Node with IP {0:<15} successfully connected to".format(storage_router.ip),
                        )
                        ip_client_map[storage_router.ip] = client
                        if storage_router != storage_router_to_remove and storage_router.node_type == "MASTER":
                            master_ip = storage_router.ip
                except UnableToConnectException:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages="  Node with IP {0:<15} is unreachable".format(storage_router.ip),
                    )
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError("Could not connect to any master node in the cluster")

            storage_router_to_remove.invalidate_dynamics("vdisks_guids")
            if (
                len(storage_router_to_remove.vdisks_guids) > 0
            ):  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError("Still vDisks attached to Storage Router {0}".format(storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(service="memcached")
            internal_rabbit_mq = Toolbox.is_service_internally_managed(service="rabbitmq")
            memcached_endpoints = Configuration.get(key="/ovs/framework/memcache|endpoints")
            rabbit_mq_endpoints = Configuration.get(key="/ovs/framework/messagequeue|endpoints")
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints) == 0 and internal_memcached is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the memcached service"
                )
            if len(copy_rabbit_mq_endpoints) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    "Removal of provided nodes will result in a complete removal of the messagequeue service"
                )
        except Exception as exception:
            Toolbox.log(
                logger=NodeRemovalController._logger, messages=[str(exception)], boxed=True, loglevel="exception"
            )
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        interactive = silent != "--force-yes"
        remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
        if interactive is True:
            proceed = Interactive.ask_yesno(
                message="Are you sure you want to remove node {0}?".format(storage_router_to_remove.name),
                default_value=False,
            )
            if proceed is False:
                Toolbox.log(logger=NodeRemovalController._logger, messages="Abort removal", title=True)
                sys.exit(1)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if ServiceManager.has_service(name="asd-manager", client=client):
                    remove_asd_manager = Interactive.ask_yesno(
                        message="Do you also want to remove the ASD manager and related ASDs?", default_value=False
                    )

            if remove_asd_manager is True or storage_router_to_remove_online is False:
                for function in Toolbox.fetch_hooks("setup", "validate_asd_removal"):
                    validation_output = function(storage_router_to_remove.ip)
                    if validation_output["confirm"] is True:
                        if Interactive.ask_yesno(message=validation_output["question"], default_value=False) is False:
                            remove_asd_manager = False
                            break

        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="Starting removal of node {0} - {1}".format(
                    storage_router_to_remove.name, storage_router_to_remove.ip
                ),
            )
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="  Marking all Storage Drivers served by Storage Router {0} as offline".format(
                        storage_router_to_remove.ip
                    ),
                )
                StorageDriverController.mark_offline(storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="  Removing vPools from node".format(storage_router_to_remove.ip),
            )
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages="    Removing vPool {0} from node".format(storage_driver.vpool.name),
                )
                StorageRouterController.remove_storagedriver(
                    storagedriver_guid=storage_driver.guid, offline_storage_router_guids=storage_routers_offline_guids
                )

            # Demote if MASTER
            if storage_router_to_remove.node_type == "MASTER":
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline,
                )

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger, messages="Stopping and removing services")
            config_store = Configuration.get_store()
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger,
                )
                service = "watcher-config"
                if ServiceManager.has_service(service, client=client):
                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing service {0}".format(service))
                    ServiceManager.stop_service(service, client=client)
                    ServiceManager.remove_service(service, client=client)

                if config_store == "etcd":
                    from ovs.extensions.db.etcd.installer import EtcdInstaller

                    if Configuration.get(key="/ovs/framework/external_config") is None:
                        Toolbox.log(logger=NodeRemovalController._logger, messages="      Removing Etcd cluster")
                        try:
                            EtcdInstaller.stop("config", client)
                            EtcdInstaller.remove("config", client)
                        except Exception as ex:
                            Toolbox.log(
                                logger=NodeRemovalController._logger,
                                messages=["\nFailed to unconfigure Etcd", ex],
                                loglevel="exception",
                            )

                    Toolbox.log(logger=NodeRemovalController._logger, messages="Removing Etcd proxy")
                    EtcdInstaller.remove_proxy("config", client.ip)

            Toolbox.run_hooks(
                component="noderemoval",
                sub_component="remove",
                logger=NodeRemovalController._logger,
                cluster_ip=storage_router_to_remove.ip,
                complete_removal=remove_asd_manager,
            )

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger, messages="Removing node from model")
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete("/ovs/framework/hosts/{0}".format(storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger,
            )

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove, username="******")
                if config_store == "arakoon":
                    client.file_delete(filenames=[ArakoonConfiguration.CACC_LOCATION])
                client.file_delete(filenames=[Configuration.BOOTSTRAP_CONFIG_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger, messages="Successfully removed node\n")
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=["An unexpected error occurred:", str(exception)],
                boxed=True,
                loglevel="exception",
            )
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\n")
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages="This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.",
                boxed=True,
                loglevel="error",
            )
            sys.exit(1)

        if remove_asd_manager is True:
            Toolbox.log(logger=NodeRemovalController._logger, messages="\nRemoving ASD Manager")
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system("asd-manager remove --force-yes")
        Toolbox.log(logger=NodeRemovalController._logger, messages="Remove nodes finished", title=True)
示例#17
0
    def prepare_mds_service(storagerouter, vpool, fresh_only, reload_config):
        """
        Prepares an MDS service:
        * Creates the required configuration
        * Sets up the service files

        Assumes the StorageRouter and VPool are already configured with a StorageDriver and that all model-wise
        configuration regarding both is completed.
        :param storagerouter: Storagerouter on which MDS service will be created
        :type storagerouter: StorageRouter

        :param vpool: The vPool for which the MDS service will be created
        :type vpool: VPool

        :param fresh_only: If True and no current mds services exist for this vpool on this storagerouter, a new 1 will be created
        :type fresh_only: bool

        :param reload_config: If True, the volumedriver's updated configuration will be reloaded
        :type reload_config: bool

        :return: Newly created service
        :rtype: MDSService
        """
        # Fetch service sequence number based on MDS services for current vPool and current storage router
        service_number = -1
        for mds_service in vpool.mds_services:
            if mds_service.service.storagerouter_guid == storagerouter.guid:
                service_number = max(mds_service.number, service_number)

        if fresh_only is True and service_number >= 0:
            return  # There is already 1 or more MDS services running, aborting

        # VALIDATIONS
        # 1. Find free port based on MDS services for all vPools on current storage router
        client = SSHClient(storagerouter)
        mdsservice_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER)
        occupied_ports = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                occupied_ports.extend(service.ports)

        mds_port_range = Configuration.get(
            "/ovs/framework/hosts/{0}/ports|mds".format(System.get_my_machine_id(client))
        )
        free_ports = System.get_free_ports(selected_range=mds_port_range, exclude=occupied_ports, nr=1, client=client)
        if not free_ports:
            raise RuntimeError(
                "Failed to find an available port on storage router {0} within range {1}".format(
                    storagerouter.name, mds_port_range
                )
            )

        # 2. Partition check
        db_partition = None
        for disk in storagerouter.disks:
            for partition in disk.partitions:
                if DiskPartition.ROLES.DB in partition.roles:
                    db_partition = partition
                    break
        if db_partition is None:
            raise RuntimeError("Could not find DB partition on storage router {0}".format(storagerouter.name))

        # 3. Verify storage driver configured
        storagedrivers = [sd for sd in vpool.storagedrivers if sd.storagerouter_guid == storagerouter.guid]
        if not storagedrivers:
            raise RuntimeError(
                "Expected to find a configured storagedriver for vpool {0} on storage router {1}".format(
                    vpool.name, storagerouter.name
                )
            )
        storagedriver = storagedrivers[0]

        # MODEL UPDATES
        # 1. Service
        service_number += 1
        service = Service()
        service.name = "metadataserver_{0}_{1}".format(vpool.name, service_number)
        service.type = mdsservice_type
        service.ports = [free_ports[0]]
        service.storagerouter = storagerouter
        service.save()
        mds_service = MDSService()
        mds_service.vpool = vpool
        mds_service.number = service_number
        mds_service.service = service
        mds_service.save()

        # 2. Storage driver partitions
        from ovs.lib.storagedriver import StorageDriverController

        StorageDriverController.add_storagedriverpartition(
            storagedriver,
            {
                "size": None,
                "role": DiskPartition.ROLES.DB,
                "sub_role": StorageDriverPartition.SUBROLE.MDS,
                "partition": db_partition,
                "mds_service": mds_service,
            },
        )

        # CONFIGURATIONS
        # 1. Volumedriver
        mds_nodes = []
        for service in mdsservice_type.services:
            if service.storagerouter_guid == storagerouter.guid:
                mds_service = service.mds_service
                if mds_service is not None:
                    if mds_service.vpool_guid == vpool.guid:
                        sdp = [
                            sd_partition
                            for sd_partition in mds_service.storagedriver_partitions
                            if sd_partition.role == DiskPartition.ROLES.DB
                            and sd_partition.sub_role == StorageDriverPartition.SUBROLE.MDS
                        ][0]
                        mds_nodes.append(
                            {
                                "host": service.storagerouter.ip,
                                "port": service.ports[0],
                                "db_directory": sdp.path,
                                "scratch_directory": sdp.path,
                            }
                        )

        # Generate the correct section in the Storage Driver's configuration
        storagedriver_config = StorageDriverConfiguration("storagedriver", vpool.guid, storagedriver.storagedriver_id)
        storagedriver_config.load()
        storagedriver_config.configure_metadata_server(mds_nodes=mds_nodes)
        storagedriver_config.save(client, reload_config=reload_config)

        return mds_service
示例#18
0
    def add_vpool(cls, parameters):
        """
        Add a vPool to the machine this task is running on
        :param parameters: Parameters for vPool creation
        :type parameters: dict
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters))
        # VALIDATIONS
        if not isinstance(parameters, dict):
            raise ValueError(
                'Parameters passed to create a vPool should be of type dict')

        # Check StorageRouter existence
        storagerouter = StorageRouterList.get_by_ip(
            ip=parameters.get('storagerouter_ip'))
        if storagerouter is None:
            raise RuntimeError('Could not find StorageRouter')

        # Validate requested vPool configurations
        vp_installer = VPoolInstaller(name=parameters.get('vpool_name'))
        vp_installer.validate(storagerouter=storagerouter)

        # Validate requested StorageDriver configurations
        cls._logger.info(
            'vPool {0}: Validating StorageDriver configurations'.format(
                vp_installer.name))
        sd_installer = StorageDriverInstaller(
            vp_installer=vp_installer,
            configurations={
                'storage_ip': parameters.get('storage_ip'),
                'caching_info': parameters.get('caching_info'),
                'backend_info': {
                    'main':
                    parameters.get('backend_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('backend_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('backend_info_fc')
                },
                'connection_info': {
                    'main':
                    parameters.get('connection_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('connection_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('connection_info_fc')
                },
                'sd_configuration': parameters.get('config_params')
            })

        partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format(
            storagerouter.guid))
        try:
            # VPOOL CREATION
            # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state)
            if vp_installer.is_new is True:
                vp_installer.create(rdma_enabled=sd_installer.rdma_enabled)
                vp_installer.configure_mds(
                    config=parameters.get('mds_config_params', {}))
            else:
                vp_installer.update_status(status=VPool.STATUSES.EXTENDING)

            # ADDITIONAL VALIDATIONS
            # Check StorageRouter connectivity
            cls._logger.info(
                'vPool {0}: Validating StorageRouter connectivity'.format(
                    vp_installer.name))
            linked_storagerouters = [storagerouter]
            if vp_installer.is_new is False:
                linked_storagerouters += [
                    sd.storagerouter
                    for sd in vp_installer.vpool.storagedrivers
                ]

            sr_client_map = SSHClient.get_clients(
                endpoints=linked_storagerouters, user_names=['ovs', 'root'])
            offline_nodes = sr_client_map.pop('offline')
            if storagerouter in offline_nodes:
                raise RuntimeError(
                    'Node on which the vPool is being {0} is not reachable'.
                    format('created'
                           if vp_installer.is_new is True else 'extended'))

            sr_installer = StorageRouterInstaller(
                root_client=sr_client_map[storagerouter]['root'],
                sd_installer=sd_installer,
                vp_installer=vp_installer,
                storagerouter=storagerouter)

            # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context
            partitions_mutex.acquire(wait=60)
            sr_installer.partition_info = StorageRouterController.get_partition_info(
                storagerouter_guid=storagerouter.guid)
            sr_installer.validate_vpool_extendable()
            sr_installer.validate_global_write_buffer(
                requested_size=parameters.get('writecache_size', 0))
            sr_installer.validate_local_cache_size(
                requested_proxies=parameters.get('parallelism', {}).get(
                    'proxies', 2))

            # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS
            sd_installer.create()
            sd_installer.create_partitions()
            partitions_mutex.release()

            vp_installer.refresh_metadata()
        except Exception:
            cls._logger.exception(
                'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}'
                .format(vp_installer.name, storagerouter.name))
            partitions_mutex.release()
            vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            raise

        # Arakoon setup
        counter = 0
        while counter < 300:
            try:
                if StorageDriverController.manual_voldrv_arakoon_checkup(
                ) is True:
                    break
            except Exception:
                cls._logger.exception(
                    'Arakoon checkup for voldrv cluster failed')
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise
            counter += 1
            time.sleep(1)
            if counter == 300:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise RuntimeError(
                    'Arakoon checkup for the StorageDriver cluster could not be started'
                )

        # Cluster registry
        try:
            vp_installer.configure_cluster_registry(allow_raise=True)
        except Exception:
            if vp_installer.is_new is True:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            else:
                vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE)
            raise

        try:
            sd_installer.setup_proxy_configs()
            sd_installer.configure_storagedriver_service()
            DiskController.sync_with_reality(storagerouter.guid)
            MDSServiceController.prepare_mds_service(
                storagerouter=storagerouter, vpool=vp_installer.vpool)

            # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver)
            vp_installer.vpool.invalidate_dynamics('configuration')
            if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[
                    'mds_config']['mds_safety'] != vp_installer.mds_safety:
                Configuration.set(
                    key='/ovs/vpools/{0}/mds_config|mds_safety'.format(
                        vp_installer.vpool.guid),
                    value=vp_installer.mds_safety)

            sd_installer.start_services(
            )  # Create and start watcher volumedriver, DTL, proxies and StorageDriver services

            # Post creation/extension checkups
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                vpool=vp_installer.vpool, offline_nodes=offline_nodes)
            for sr, clients in sr_client_map.iteritems():
                for current_storagedriver in [
                        sd for sd in sr.storagedrivers
                        if sd.vpool_guid == vp_installer.vpool.guid
                ]:
                    storagedriver_config = StorageDriverConfiguration(
                        vpool_guid=vp_installer.vpool.guid,
                        storagedriver_id=current_storagedriver.storagedriver_id
                    )
                    if storagedriver_config.config_missing is False:
                        # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem
                        # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them
                        storagedriver_config.configure_filesystem(
                            fs_metadata_backend_mds_nodes=mds_config_set[
                                sr.guid])
                        storagedriver_config.save(client=clients['ovs'])

            # Everything's reconfigured, refresh new cluster configuration
            for current_storagedriver in vp_installer.vpool.storagedrivers:
                if current_storagedriver.storagerouter not in sr_client_map:
                    continue
                vp_installer.vpool.storagedriver_client.update_cluster_node_configs(
                    str(current_storagedriver.storagedriver_id),
                    req_timeout_secs=10)
        except Exception:
            cls._logger.exception('vPool {0}: Creation failed'.format(
                vp_installer.name))
            vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise

        # When a node is offline, we can run into errors, but also when 1 or more volumes are not running
        # Scheduled tasks below, so don't really care whether they succeed or not
        try:
            VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                        ensure_single_timeout=600)
        except:
            pass
        for vdisk in vp_installer.vpool.vdisks:
            try:
                MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid)
            except:
                pass
        vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info('Add vPool {0} ended successfully'.format(
            vp_installer.name))
示例#19
0
    def promote_or_demote_node(node_action, cluster_ip=None, execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger, messages='Open vStorage Setup - {0}'.format(node_action.capitalize()), boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger, messages='Collecting information', title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError('Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get('/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError("If the node is online, please use 'ovs setup demote' executed on the node you wish to demote")
                if master_ip is None:
                    raise RuntimeError('Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1', username='******', password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(ip=target_client.ip, password=target_password)
                node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues()]
                master_node_ips = [sr_info['ip'] for sr_info in storagerouter_info.itervalues() if sr_info['type'] == 'master' and sr_info['ip'] != ip]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError('It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict((node_ip, SSHClient(node_ip, username='******')) for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_name, False)
                    config.load_config()
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError('Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.')

            configure_rabbitmq = Toolbox.is_service_internally_managed(service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(cluster_ip=ip,
                                                    master_ip=master_ip,
                                                    ip_client_map=ip_client_map,
                                                    unique_id=unique_id,
                                                    configure_memcached=configure_memcached,
                                                    configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(cluster_ip=ip,
                                                       master_ip=master_ip,
                                                       ip_client_map=ip_client_map,
                                                       unique_id=unique_id,
                                                       unconfigure_memcached=configure_memcached,
                                                       unconfigure_rabbitmq=configure_rabbitmq,
                                                       offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(cluster_ip=ip,
                                                   master_ip=master_ip,
                                                   ip_client_map=ip_client_map,
                                                   unique_id=unique_id,
                                                   unconfigure_memcached=configure_memcached,
                                                   unconfigure_rabbitmq=configure_rabbitmq,
                                                   offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(cluster_ip=ip,
                                                        master_ip=master_ip,
                                                        ip_client_map=ip_client_map,
                                                        unique_id=unique_id,
                                                        configure_memcached=configure_memcached,
                                                        configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages='{0} complete.'.format(node_action.capitalize()), boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger, messages=['An unexpected error occurred:', str(exception)], boxed=True, loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                        boxed=True,
                        loglevel='error')
            sys.exit(1)
示例#20
0
    def remove_node(node_ip, silent=None):
        """
        Remove the node with specified IP from the cluster
        :param node_ip: IP of the node to remove
        :type node_ip: str
        :param silent: If silent == '--force-yes' no question will be asked to confirm the removal
        :type silent: str
        :return: None
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.lib.storagedriver import StorageDriverController
        from ovs.lib.vpool import VPoolController

        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove node',
                    boxed=True)
        Toolbox.log(
            logger=NodeRemovalController._logger,
            messages=
            'WARNING: Some of these steps may take a very long time, please check the logs for more information\n\n'
        )
        service_manager = ServiceFactory.get_manager()

        ###############
        # VALIDATIONS #
        ###############
        try:
            node_ip = node_ip.strip()
            if not isinstance(node_ip, str):
                raise ValueError('Node IP must be a string')
            if not re.match(SSHClient.IP_REGEX, node_ip):
                raise ValueError('Invalid IP {0} specified'.format(node_ip))

            storage_router_all = sorted(StorageRouterList.get_storagerouters(),
                                        key=lambda k: k.name)
            storage_router_masters = StorageRouterList.get_masters()
            storage_router_all_ips = set(
                [storage_router.ip for storage_router in storage_router_all])
            storage_router_master_ips = set([
                storage_router.ip for storage_router in storage_router_masters
            ])
            storage_router_to_remove = StorageRouterList.get_by_ip(node_ip)
            offline_reasons = {}
            if node_ip not in storage_router_all_ips:
                raise ValueError(
                    'Unknown IP specified\nKnown in model:\n - {0}\nSpecified for removal:\n - {1}'
                    .format('\n - '.join(storage_router_all_ips), node_ip))

            if len(storage_router_all_ips) == 1:
                raise RuntimeError("Removing the only node is not possible")

            if node_ip in storage_router_master_ips and len(
                    storage_router_master_ips) == 1:
                raise RuntimeError(
                    "Removing the only master node is not possible")

            if System.get_my_storagerouter() == storage_router_to_remove:
                raise RuntimeError(
                    'The node to be removed cannot be identical to the node on which the removal is initiated'
                )

            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages='Creating SSH connections to remaining master nodes')
            master_ip = None
            ip_client_map = {}
            storage_routers_offline = []
            storage_router_to_remove_online = True
            for storage_router in storage_router_all:
                try:
                    client = SSHClient(storage_router,
                                       username='******',
                                       timeout=10)
                except (UnableToConnectException, NotAuthenticatedException,
                        TimeOutException) as ex:
                    if isinstance(ex, UnableToConnectException):
                        msg = 'Unable to connect'
                    elif isinstance(ex, NotAuthenticatedException):
                        msg = 'Could not authenticate'
                    elif isinstance(ex, TimeOutException):
                        msg = 'Connection timed out'
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='  * Node with IP {0:<15}- {1}'.format(
                            storage_router.ip, msg))
                    offline_reasons[storage_router.ip] = msg
                    storage_routers_offline.append(storage_router)
                    if storage_router == storage_router_to_remove:
                        storage_router_to_remove_online = False
                    continue

                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages='  * Node with IP {0:<15}- Successfully connected'
                    .format(storage_router.ip))
                ip_client_map[storage_router.ip] = client
                if storage_router != storage_router_to_remove and storage_router.node_type == 'MASTER':
                    master_ip = storage_router.ip

            if len(ip_client_map) == 0 or master_ip is None:
                raise RuntimeError(
                    'Could not connect to any master node in the cluster')

            storage_router_to_remove.invalidate_dynamics('vdisks_guids')
            if len(
                    storage_router_to_remove.vdisks_guids
            ) > 0:  # vDisks are supposed to be moved away manually before removing a node
                raise RuntimeError(
                    "Still vDisks attached to Storage Router {0}".format(
                        storage_router_to_remove.name))

            internal_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            internal_rabbit_mq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            memcached_endpoints = Configuration.get(
                key='/ovs/framework/memcache|endpoints')
            rabbit_mq_endpoints = Configuration.get(
                key='/ovs/framework/messagequeue|endpoints')
            copy_memcached_endpoints = list(memcached_endpoints)
            copy_rabbit_mq_endpoints = list(rabbit_mq_endpoints)
            for endpoint in memcached_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_memcached_endpoints.remove(endpoint)
            for endpoint in rabbit_mq_endpoints:
                if endpoint.startswith(storage_router_to_remove.ip):
                    copy_rabbit_mq_endpoints.remove(endpoint)
            if len(copy_memcached_endpoints
                   ) == 0 and internal_memcached is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the memcached service'
                )
            if len(copy_rabbit_mq_endpoints
                   ) == 0 and internal_rabbit_mq is True:
                raise RuntimeError(
                    'Removal of provided nodes will result in a complete removal of the messagequeue service'
                )

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='validate_removal',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the validation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)

        #################
        # CONFIRMATIONS #
        #################
        try:
            interactive = silent != '--force-yes'
            remove_asd_manager = not interactive  # Remove ASD manager if non-interactive else ask
            if interactive is True:
                if len(storage_routers_offline) > 0:
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages=
                        'Certain nodes appear to be offline. These will not fully removed and will cause issues if they are not really offline.'
                    )
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Offline nodes: {0}'.format(''.join(
                            ('\n  * {0:<15}- {1}.'.format(ip, message)
                             for ip, message in offline_reasons.iteritems()))))
                    valid_node_info = Interactive.ask_yesno(
                        message=
                        'Continue the removal with these being presumably offline?',
                        default_value=False)
                    if valid_node_info is False:
                        Toolbox.log(
                            logger=NodeRemovalController._logger,
                            messages=
                            'Please validate the state of the nodes before removing.',
                            title=True)
                        sys.exit(1)
                proceed = Interactive.ask_yesno(
                    message='Are you sure you want to remove node {0}?'.format(
                        storage_router_to_remove.name),
                    default_value=False)
                if proceed is False:
                    Toolbox.log(logger=NodeRemovalController._logger,
                                messages='Abort removal',
                                title=True)
                    sys.exit(1)

                remove_asd_manager = True
                if storage_router_to_remove_online is True:
                    client = SSHClient(endpoint=storage_router_to_remove,
                                       username='******')
                    if service_manager.has_service(name='asd-manager',
                                                   client=client):
                        remove_asd_manager = Interactive.ask_yesno(
                            message=
                            'Do you also want to remove the ASD manager and related ASDs?',
                            default_value=False)

                if remove_asd_manager is True or storage_router_to_remove_online is False:
                    for fct in Toolbox.fetch_hooks('noderemoval',
                                                   'validate_asd_removal'):
                        validation_output = fct(storage_router_to_remove.ip)
                        if validation_output['confirm'] is True:
                            if Interactive.ask_yesno(
                                    message=validation_output['question'],
                                    default_value=False) is False:
                                remove_asd_manager = False
                                break
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'Removal has been aborted during the confirmation step. No changes have been applied.',
                boxed=True,
                loglevel='warning')
            sys.exit(1)
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages=[str(exception)],
                        boxed=True,
                        loglevel='exception')
            sys.exit(1)
        ###########
        # REMOVAL #
        ###########
        try:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Starting removal of node {0} - {1}'.format(
                            storage_router_to_remove.name,
                            storage_router_to_remove.ip))
            if storage_router_to_remove_online is False:
                Toolbox.log(
                    logger=NodeRemovalController._logger,
                    messages=
                    '  Marking all Storage Drivers served by Storage Router {0} as offline'
                    .format(storage_router_to_remove.ip))
                StorageDriverController.mark_offline(
                    storagerouter_guid=storage_router_to_remove.guid)

            # Remove vPools
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='  Removing vPools from node'.format(
                            storage_router_to_remove.ip))
            storage_routers_offline_guids = [
                sr.guid for sr in storage_routers_offline
                if sr.guid != storage_router_to_remove.guid
            ]
            for storage_driver in storage_router_to_remove.storagedrivers:
                Toolbox.log(logger=NodeRemovalController._logger,
                            messages='    Removing vPool {0} from node'.format(
                                storage_driver.vpool.name))
                VPoolController.shrink_vpool(
                    storagedriver_guid=storage_driver.guid,
                    offline_storage_router_guids=storage_routers_offline_guids)

            # Demote if MASTER
            if storage_router_to_remove.node_type == 'MASTER':
                NodeTypeController.demote_node(
                    cluster_ip=storage_router_to_remove.ip,
                    master_ip=master_ip,
                    ip_client_map=ip_client_map,
                    unique_id=storage_router_to_remove.machine_id,
                    unconfigure_memcached=internal_memcached,
                    unconfigure_rabbitmq=internal_rabbit_mq,
                    offline_nodes=storage_routers_offline)

            # Stop / remove services
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Stopping and removing services')
            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                NodeRemovalController.remove_services(
                    client=client,
                    node_type=storage_router_to_remove.node_type.lower(),
                    logger=NodeRemovalController._logger)
                service = 'watcher-config'
                if service_manager.has_service(service, client=client):
                    Toolbox.log(
                        logger=NodeRemovalController._logger,
                        messages='Removing service {0}'.format(service))
                    service_manager.stop_service(service, client=client)
                    service_manager.remove_service(service, client=client)

            Toolbox.run_hooks(component='noderemoval',
                              sub_component='remove',
                              logger=NodeRemovalController._logger,
                              cluster_ip=storage_router_to_remove.ip,
                              complete_removal=remove_asd_manager)

            # Clean up model
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Removing node from model')
            for service in storage_router_to_remove.services:
                service.delete()
            for disk in storage_router_to_remove.disks:
                for partition in disk.partitions:
                    partition.delete()
                disk.delete()
            for j_domain in storage_router_to_remove.domains:
                j_domain.delete()
            Configuration.delete('/ovs/framework/hosts/{0}'.format(
                storage_router_to_remove.machine_id))

            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map,
                offline_node_ips=[node.ip for node in storage_routers_offline],
                logger=NodeRemovalController._logger)

            if storage_router_to_remove_online is True:
                client = SSHClient(endpoint=storage_router_to_remove,
                                   username='******')
                client.file_delete(filenames=[CACC_LOCATION])
                client.file_delete(filenames=[CONFIG_STORE_LOCATION])
            storage_router_to_remove.delete()
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='Successfully removed node\n')
        except Exception as exception:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeRemovalController._logger, messages='\n')
            Toolbox.log(
                logger=NodeRemovalController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)

        if remove_asd_manager is True and storage_router_to_remove_online is True:
            Toolbox.log(logger=NodeRemovalController._logger,
                        messages='\nRemoving ASD Manager')
            with remote(storage_router_to_remove.ip, [os]) as rem:
                rem.os.system('asd-manager remove --force-yes')
        Toolbox.log(logger=NodeRemovalController._logger,
                    messages='Remove nodes finished',
                    title=True)
示例#21
0
    def promote_or_demote_node(node_action,
                               cluster_ip=None,
                               execute_rollback=False):
        """
        Promotes or demotes the local node
        :param node_action: Demote or promote
        :type node_action: str
        :param cluster_ip: IP of node to promote or demote
        :type cluster_ip: str
        :param execute_rollback: In case of failure revert the changes made
        :type execute_rollback: bool
        :return: None
        """

        if node_action not in ('promote', 'demote'):
            raise ValueError('Nodes can only be promoted or demoted')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Open vStorage Setup - {0}'.format(
                        node_action.capitalize()),
                    boxed=True)
        try:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Collecting information',
                        title=True)

            machine_id = System.get_my_machine_id()
            if Configuration.get('/ovs/framework/hosts/{0}/setupcompleted'.
                                 format(machine_id)) is False:
                raise RuntimeError('No local OVS setup found.')

            if cluster_ip and not re.match(Toolbox.regex_ip, cluster_ip):
                raise RuntimeError(
                    'Incorrect IP provided ({0})'.format(cluster_ip))

            if cluster_ip:
                client = SSHClient(endpoint=cluster_ip)
                machine_id = System.get_my_machine_id(client)

            node_type = Configuration.get(
                '/ovs/framework/hosts/{0}/type'.format(machine_id))
            if node_action == 'promote' and node_type == 'MASTER':
                raise RuntimeError('This node is already master.')
            elif node_action == 'demote' and node_type == 'EXTRA':
                raise RuntimeError('This node should be a master.')
            elif node_type not in ['MASTER', 'EXTRA']:
                raise RuntimeError('This node is not correctly configured.')

            master_ip = None
            offline_nodes = []

            online = True
            target_client = None
            if node_action == 'demote' and cluster_ip:  # Demote an offline node
                from ovs.dal.lists.storagerouterlist import StorageRouterList
                from ovs.lib.storagedriver import StorageDriverController

                ip = cluster_ip
                unique_id = None
                ip_client_map = {}
                for storage_router in StorageRouterList.get_storagerouters():
                    try:
                        client = SSHClient(storage_router.ip, username='******')
                        if storage_router.node_type == 'MASTER':
                            master_ip = storage_router.ip
                        ip_client_map[storage_router.ip] = client
                    except UnableToConnectException:
                        if storage_router.ip == cluster_ip:
                            online = False
                            unique_id = storage_router.machine_id
                            StorageDriverController.mark_offline(
                                storagerouter_guid=storage_router.guid)
                        offline_nodes.append(storage_router)
                if online is True:
                    raise RuntimeError(
                        "If the node is online, please use 'ovs setup demote' executed on the node you wish to demote"
                    )
                if master_ip is None:
                    raise RuntimeError(
                        'Failed to retrieve another responsive MASTER node')

            else:
                target_password = Toolbox.ask_validate_password(
                    ip='127.0.0.1', logger=NodeTypeController._logger)
                target_client = SSHClient('127.0.0.1',
                                          username='******',
                                          password=target_password)

                unique_id = System.get_my_machine_id(target_client)
                ip = Configuration.get(
                    '/ovs/framework/hosts/{0}/ip'.format(unique_id))

                storagerouter_info = NodeTypeController.retrieve_storagerouter_info_via_host(
                    ip=target_client.ip, password=target_password)
                node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                ]
                master_node_ips = [
                    sr_info['ip']
                    for sr_info in storagerouter_info.itervalues()
                    if sr_info['type'] == 'master' and sr_info['ip'] != ip
                ]
                if len(master_node_ips) == 0:
                    if node_action == 'promote':
                        raise RuntimeError('No master node could be found')
                    else:
                        raise RuntimeError(
                            'It is not possible to remove the only master')

                master_ip = master_node_ips[0]
                ip_client_map = dict(
                    (node_ip, SSHClient(node_ip, username='******'))
                    for node_ip in node_ips)

            if node_action == 'demote':
                for cluster_name in Configuration.list('/ovs/arakoon'):
                    config = ArakoonClusterConfig(cluster_id=cluster_name)
                    arakoon_client = ArakoonInstaller.build_client(config)
                    metadata = json.loads(
                        arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if len(config.nodes) == 1 and config.nodes[
                            0].ip == ip and metadata.get('internal') is True:
                        raise RuntimeError(
                            'Demote is not supported when single node Arakoon cluster(s) are present on the node to be demoted.'
                        )

            configure_rabbitmq = Toolbox.is_service_internally_managed(
                service='rabbitmq')
            configure_memcached = Toolbox.is_service_internally_managed(
                service='memcached')
            if node_action == 'promote':
                try:
                    NodeTypeController.promote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        configure_memcached=configure_memcached,
                        configure_rabbitmq=configure_rabbitmq)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.demote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            unconfigure_memcached=configure_memcached,
                            unconfigure_rabbitmq=configure_rabbitmq,
                            offline_nodes=offline_nodes)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback', 'demote')
                    raise
            else:
                try:
                    NodeTypeController.demote_node(
                        cluster_ip=ip,
                        master_ip=master_ip,
                        ip_client_map=ip_client_map,
                        unique_id=unique_id,
                        unconfigure_memcached=configure_memcached,
                        unconfigure_rabbitmq=configure_rabbitmq,
                        offline_nodes=offline_nodes)
                except Exception:
                    if execute_rollback is True:
                        NodeTypeController.promote_node(
                            cluster_ip=ip,
                            master_ip=master_ip,
                            ip_client_map=ip_client_map,
                            unique_id=unique_id,
                            configure_memcached=configure_memcached,
                            configure_rabbitmq=configure_rabbitmq)
                    elif target_client is not None:
                        target_client.file_write('/tmp/ovs_rollback',
                                                 'promote')
                    raise

            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='{0} complete.'.format(
                            node_action.capitalize()),
                        boxed=True)
        except Exception as exception:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=['An unexpected error occurred:',
                          str(exception)],
                boxed=True,
                loglevel='exception')
            sys.exit(1)
        except KeyboardInterrupt:
            Toolbox.log(logger=NodeTypeController._logger, messages='\n')
            Toolbox.log(
                logger=NodeTypeController._logger,
                messages=
                'This setup was aborted. Open vStorage may be in an inconsistent state, make sure to validate the installation.',
                boxed=True,
                loglevel='error')
            sys.exit(1)