Exemplo n.º 1
0
 def list(self, vpool_guid=None):
     """
     Overview of all StorageDrivers
     :param vpool_guid: Guid of the vPool
     :type vpool_guid: str
     """
     if vpool_guid is not None:
         return VPool(vpool_guid).storagedrivers
     return StorageDriverList.get_storagedrivers()
Exemplo n.º 2
0
def getEdgeconnection(vpoolname=VPOOLNAME):
    protocol = getEdgeProtocol()
    storagedrivers = list(StorageDriverList.get_storagedrivers())
    random.shuffle(storagedrivers)
    for storagedriver in storagedrivers:
        if storagedriver.status == 'FAILURE':
            continue
        if (vpoolname is not None and storagedriver.vpool.name == vpoolname) or \
                (vpoolname is None and storagedriver.vpool.name != VPOOLNAME):
            return storagedriver.storage_ip, storagedriver.ports['edge'], protocol
    return None, None, protocol
Exemplo n.º 3
0
    def create(self):
        """
        Prepares a new Storagedriver for a given vPool and Storagerouter
        :return: None
        :rtype: NoneType
        """
        if self.sr_installer is None:
            raise RuntimeError('No StorageRouterInstaller instance found')

        machine_id = System.get_my_machine_id(client=self.sr_installer.root_client)
        port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
        storagerouter = self.sr_installer.storagerouter
        with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30):
            model_ports_in_use = []
            for sd in StorageDriverList.get_storagedrivers():
                if sd.storagerouter_guid == storagerouter.guid:
                    model_ports_in_use += sd.ports.values()
                    for proxy in sd.alba_proxies:
                        model_ports_in_use.append(proxy.service.ports[0])
            ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client)

            vpool = self.vp_installer.vpool
            vrouter_id = '{0}{1}'.format(vpool.name, machine_id)
            storagedriver = StorageDriver()
            storagedriver.name = vrouter_id.replace('_', ' ')
            storagedriver.ports = {'management': ports[0],
                                   'xmlrpc': ports[1],
                                   'dtl': ports[2],
                                   'edge': ports[3]}
            storagedriver.vpool = vpool
            storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))
            storagedriver.storage_ip = self.storage_ip
            storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name)
            storagedriver.description = storagedriver.name
            storagedriver.storagerouter = storagerouter
            storagedriver.storagedriver_id = vrouter_id
            storagedriver.save()

            # ALBA Proxies
            proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY)
            for proxy_id in xrange(self.sr_installer.requested_proxies):
                service = Service()
                service.storagerouter = storagerouter
                service.ports = [ports[4 + proxy_id]]
                service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id)
                service.type = proxy_service_type
                service.save()
                alba_proxy = AlbaProxy()
                alba_proxy.service = service
                alba_proxy.storagedriver = storagedriver
                alba_proxy.save()
        self.storagedriver = storagedriver
Exemplo n.º 4
0
    def get_storagedrivers_in_same_domain(domain_guid):
        """
        Get storagerouter guids in a domain

        :param domain_guid: guid of a domain
        :type domain_guid: str
        :return: list of storagerouter guids
        :rtype: list
        """
        return [
            storagedriver
            for storagedriver in StorageDriverList.get_storagedrivers()
            if domain_guid in storagedriver.storagerouter.regular_domains
        ]
Exemplo n.º 5
0
 def delete_snapshots(timestamp=None):
     # type: (float) -> GroupResult
     """
     Delete snapshots based on the retention policy
     Offloads concurrency to celery
     Returns a GroupResult. Waiting for the result can be done using result.get()
     :param timestamp: Timestamp to determine whether snapshots should be kept or not, if none provided, current time will be used
     :type timestamp: float
     :return: The GroupResult
     :rtype: GroupResult
     """
     # The result cannot be fetched in this task
     group_id = uuid()
     return group(GenericController.delete_snapshots_storagedriver.s(storagedriver.guid, timestamp, group_id)
                  for storagedriver in StorageDriverList.get_storagedrivers()).apply_async(task_id=group_id)
Exemplo n.º 6
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        :param vmachine: Virtual Machine to update
        :param vm_object: New virtual machine info
        :param pmachine: Physical machine of the virtual machine
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename))
            for disk in vm_object['disks']:
                ensure_safety = False
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        try:
                            mutex.acquire(wait=10)
                            vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                            if vdisk is None:
                                # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                                vdisk = VDisk()
                                vdisk.vpool = datastores[datastore].vpool
                                vdisk.reload_client()
                                vdisk.devicename = disk['filename']
                                vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                                vdisk.size = vdisk.info['volume_size']
                                vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                                  'cluster_multiplier': vdisk.info['cluster_multiplier']}
                                # Create the disk in a locked context, but don't execute long running-task in same context
                                vdisk.save()
                                ensure_safety = True
                        finally:
                            mutex.release()
                        if ensure_safety:
                            MDSServiceController.ensure_safety(vdisk)
                            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)

                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Exemplo n.º 7
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.generic import GenericController

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter,
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client, package_name='arakoon', old_service_name=service_name)
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_service_name=old_service_name,
                                                    new_service_name=new_service_name)

                # Register new service and remove old service
                service_manager.add_service(name='ovs-albaproxy',
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get('fragment_cache', ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config['fragment_cache'] == ['none']:
                            proxy_scrub_config['fragment_cache'] = fragment_cache_scrub_info
                            Configuration.set(proxy_scrub_config_key,
                                              json.dumps(proxy_scrub_config, indent=4),
                                              raw=True)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration('storagedriver', vpool.guid, storagedriver.storagedriver_id)
            storagedriver_config.load()
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_service_name='volumedriver_{0}'.format(vpool.name))
                if service_manager.ImplementationClass == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        for vpool in VPoolList.get_vpools():
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        MigrationController._logger.info('Finished out of band migrations')
        GenericController.refresh_package_information()
Exemplo n.º 8
0
    def gather_scrub_work():
        logger.info("Divide scrubbing work among allowed Storage Routers")

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info(
                        "Scrub partition found on Storage Router {0}: {1}".format(storage_driver.name, partition.folder)
                    )
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter.ip)
                            scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            logger.warning("StorageRouter {0} is not reachable".format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError("No scrub locations found")

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0:
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info["object_type"] in ["BASE"] and len(vdisk.child_vdisks) == 0:
                vdisk_guids.add(vdisk.guid)

        logger.info("Found {0} virtual disks which need to be check for scrub work".format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info(
                "Executing scrub work on {0} Storage Router {1} for {2} virtual disks".format(
                    "local" if local is True else "remote", storage_router.name, len(vdisk_guids_to_scrub)
                )
            )

            if local is True:
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(
                    ScheduledTaskController._execute_scrub_work.s(
                        scrub_location=scrub_info[1], vdisk_guids=vdisk_guids_to_scrub
                    ).apply_async(routing_key="sr.{0}".format(storage_router.machine_id))
                )
                storage_router_list.append(storage_router)
                logger.info("Launched scrub task on Storage Router {0}".format(storage_router.name))

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            ScheduledTaskController._execute_scrub_work(
                scrub_location=local_scrub_location, vdisk_guids=local_vdisks_to_scrub
            )
        all_results = result_set.join(
            propagate=False
        )  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if result is not None:
                logger.error(
                    "Scrubbing failed on Storage Router {0} with error {1}".format(
                        storage_router_list[index].name, result
                    )
                )
Exemplo n.º 9
0
    def migrate(previous_version):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        version 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        @param previous_version: The previous version from which to start the migration.
        """

        working_version = previous_version

        # Version 1 introduced:
        # - The datastore is still empty, add defaults
        if working_version < 1:
            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.backendtype import BackendType
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding
            from ovs.dal.lists.backendtypelist import BackendTypeList

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(
                random.choice(string.ascii_letters + string.digits +
                              '|_=+*#@!/-[]{}<>.?,\'";:~') for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [(admin_group, [read_role, write_role, manage_role]),
                       (viewers_group, [read_role])]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add backends
            for backend_type_info in [('Ceph', 'ceph_s3'),
                                      ('Amazon', 'amazon_s3'),
                                      ('Swift', 'swift_s3'),
                                      ('Local', 'local'),
                                      ('Distributed', 'distributed'),
                                      ('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [
                    ServiceType.SERVICE_TYPES.MD_SERVER,
                    ServiceType.SERVICE_TYPES.ALBA_PROXY,
                    ServiceType.SERVICE_TYPES.ARAKOON
            ]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Branding
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

            # Failure Domain
            failure_domain = FailureDomain()
            failure_domain.name = 'Default'
            failure_domain.save()

            # We're now at version 1
            working_version = 1

        # Version 2 introduced:
        # - new Descriptor format
        if working_version < 2:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[
                            entry] and 'hybrids' in data[entry]['source']:
                        filename = data[entry]['source']
                        if not filename.startswith('/'):
                            filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(
                                filename)
                        module = imp.load_source(data[entry]['name'], filename)
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            # We're now at version 2
            working_version = 2

        # Version 3 introduced:
        # - new Descriptor format
        if working_version < 3:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry],
                                  dict) and 'source' in data[entry]:
                        module = imp.load_source(data[entry]['name'],
                                                 data[entry]['source'])
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            working_version = 3

        # Version 4 introduced:
        # - Flexible SSD layout
        if working_version < 4:
            import os
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
            for service in ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.MD_SERVER).services:
                mds_service = service.mds_service
                storagedriver = None
                for current_storagedriver in service.storagerouter.storagedrivers:
                    if current_storagedriver.vpool_guid == mds_service.vpool_guid:
                        storagedriver = current_storagedriver
                        break
                tasks = {}
                if storagedriver._data.get('mountpoint_md'):
                    tasks['{0}/mds_{1}_{2}'.format(
                        storagedriver._data.get('mountpoint_md'),
                        storagedriver.vpool.name, mds_service.number)] = (
                            DiskPartition.ROLES.DB,
                            StorageDriverPartition.SUBROLE.MDS)
                if storagedriver._data.get('mountpoint_temp'):
                    tasks['{0}/mds_{1}_{2}'.format(
                        storagedriver._data.get('mountpoint_temp'),
                        storagedriver.vpool.name, mds_service.number)] = (
                            DiskPartition.ROLES.SCRUB,
                            StorageDriverPartition.SUBROLE.MDS)
                for disk in service.storagerouter.disks:
                    for partition in disk.partitions:
                        for directory, (role, subrole) in tasks.iteritems():
                            with remote(storagedriver.storagerouter.ip, [os],
                                        username='******') as rem:
                                stat_dir = directory
                                while not rem.os.path.exists(
                                        stat_dir) and stat_dir != '/':
                                    stat_dir = stat_dir.rsplit('/', 1)[0]
                                    if not stat_dir:
                                        stat_dir = '/'
                                inode = rem.os.stat(stat_dir).st_dev
                            if partition.inode == inode:
                                if role not in partition.roles:
                                    partition.roles.append(role)
                                    partition.save()
                                number = 0
                                migrated = False
                                for sd_partition in storagedriver.partitions:
                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                        if sd_partition.mds_service == mds_service:
                                            migrated = True
                                            break
                                        if sd_partition.partition_guid == partition.guid:
                                            number = max(
                                                sd_partition.number, number)
                                if migrated is False:
                                    sd_partition = StorageDriverPartition()
                                    sd_partition.role = role
                                    sd_partition.sub_role = subrole
                                    sd_partition.partition = partition
                                    sd_partition.storagedriver = storagedriver
                                    sd_partition.mds_service = mds_service
                                    sd_partition.size = None
                                    sd_partition.number = number + 1
                                    sd_partition.save()
                                    client = SSHClient(
                                        storagedriver.storagerouter,
                                        username='******')
                                    path = sd_partition.path.rsplit('/', 1)[0]
                                    if path:
                                        client.dir_create(path)
                                        client.dir_chown(path, 'ovs', 'ovs')
                                    client.dir_create(directory)
                                    client.dir_chown(directory, 'ovs', 'ovs')
                                    client.symlink(
                                        {sd_partition.path: directory})
            for storagedriver in StorageDriverList.get_storagedrivers():
                migrated_objects = {}
                for disk in storagedriver.storagerouter.disks:
                    for partition in disk.partitions:
                        # Process all mountpoints that are unique and don't have a specified size
                        for key, (role, sr_info) in {
                                'mountpoint_md': (DiskPartition.ROLES.DB, {
                                    'metadata_{0}':
                                    StorageDriverPartition.SUBROLE.MD,
                                    'tlogs_{0}':
                                    StorageDriverPartition.SUBROLE.TLOG
                                }),
                                'mountpoint_fragmentcache':
                            (DiskPartition.ROLES.WRITE, {
                                'fcache_{0}':
                                StorageDriverPartition.SUBROLE.FCACHE
                            }),
                                'mountpoint_foc': (DiskPartition.ROLES.WRITE, {
                                    'fd_{0}':
                                    StorageDriverPartition.SUBROLE.FD,
                                    'dtl_{0}':
                                    StorageDriverPartition.SUBROLE.DTL
                                }),
                                'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {
                                    'fd_{0}':
                                    StorageDriverPartition.SUBROLE.FD,
                                    'dtl_{0}':
                                    StorageDriverPartition.SUBROLE.DTL
                                }),
                                'mountpoint_readcaches':
                            (DiskPartition.ROLES.READ, {
                                '': None
                            }),
                                'mountpoint_writecaches':
                            (DiskPartition.ROLES.WRITE, {
                                'sco_{0}': StorageDriverPartition.SUBROLE.SCO
                            })
                        }.iteritems():
                            if key in storagedriver._data:
                                is_list = isinstance(storagedriver._data[key],
                                                     list)
                                entries = storagedriver._data[
                                    key][:] if is_list is True else [
                                        storagedriver._data[key]
                                    ]
                                for entry in entries:
                                    if not entry:
                                        if is_list:
                                            storagedriver._data[key].remove(
                                                entry)
                                            if len(storagedriver._data[key]
                                                   ) == 0:
                                                del storagedriver._data[key]
                                        else:
                                            del storagedriver._data[key]
                                    else:
                                        with remote(
                                                storagedriver.storagerouter.ip,
                                            [os],
                                                username='******') as rem:
                                            inode = rem.os.stat(entry).st_dev
                                        if partition.inode == inode:
                                            if role not in partition.roles:
                                                partition.roles.append(role)
                                                partition.save()
                                            for folder, subrole in sr_info.iteritems(
                                            ):
                                                number = 0
                                                migrated = False
                                                for sd_partition in storagedriver.partitions:
                                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                                        if sd_partition.partition_guid == partition.guid:
                                                            number = max(
                                                                sd_partition.
                                                                number, number)
                                                if migrated is False:
                                                    sd_partition = StorageDriverPartition(
                                                    )
                                                    sd_partition.role = role
                                                    sd_partition.sub_role = subrole
                                                    sd_partition.partition = partition
                                                    sd_partition.storagedriver = storagedriver
                                                    sd_partition.size = None
                                                    sd_partition.number = number + 1
                                                    sd_partition.save()
                                                    if folder:
                                                        source = '{0}/{1}'.format(
                                                            entry,
                                                            folder.format(
                                                                storagedriver.
                                                                vpool.name))
                                                    else:
                                                        source = entry
                                                    client = SSHClient(
                                                        storagedriver.
                                                        storagerouter,
                                                        username='******')
                                                    path = sd_partition.path.rsplit(
                                                        '/', 1)[0]
                                                    if path:
                                                        client.dir_create(path)
                                                        client.dir_chown(
                                                            path, 'ovs', 'ovs')
                                                    client.symlink({
                                                        sd_partition.path:
                                                        source
                                                    })
                                                    migrated_objects[
                                                        source] = sd_partition
                                            if is_list:
                                                storagedriver._data[
                                                    key].remove(entry)
                                                if len(storagedriver._data[key]
                                                       ) == 0:
                                                    del storagedriver._data[
                                                        key]
                                            else:
                                                del storagedriver._data[key]
                                            storagedriver.save()
                if 'mountpoint_bfs' in storagedriver._data:
                    storagedriver.mountpoint_dfs = storagedriver._data[
                        'mountpoint_bfs']
                    if not storagedriver.mountpoint_dfs:
                        storagedriver.mountpoint_dfs = None
                    del storagedriver._data['mountpoint_bfs']
                    storagedriver.save()
                if 'mountpoint_temp' in storagedriver._data:
                    del storagedriver._data['mountpoint_temp']
                    storagedriver.save()
                if migrated_objects:
                    print 'Loading sizes'
                    config = StorageDriverConfiguration(
                        'storagedriver', storagedriver.vpool_guid,
                        storagedriver.storagedriver_id)
                    config.load()
                    for readcache in config.configuration.get(
                            'content_addressed_cache',
                        {}).get('clustercache_mount_points', []):
                        path = readcache.get('path', '').rsplit('/', 1)[0]
                        size = int(readcache['size'].strip(
                            'KiB')) * 1024 if 'size' in readcache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()
                    for writecache in config.configuration.get(
                            'scocache', {}).get('scocache_mount_points', []):
                        path = writecache.get('path', '')
                        size = int(writecache['size'].strip(
                            'KiB')) * 1024 if 'size' in writecache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()

            working_version = 4

        # Version 5 introduced:
        # - Failure Domains
        if working_version < 5:
            import os
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.lists.failuredomainlist import FailureDomainList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            failure_domains = FailureDomainList.get_failure_domains()
            if len(failure_domains) > 0:
                failure_domain = failure_domains[0]
            else:
                failure_domain = FailureDomain()
                failure_domain.name = 'Default'
                failure_domain.save()
            for storagerouter in StorageRouterList.get_storagerouters():
                change = False
                if storagerouter.primary_failure_domain is None:
                    storagerouter.primary_failure_domain = failure_domain
                    change = True
                if storagerouter.rdma_capable is None:
                    client = SSHClient(storagerouter, username='******')
                    rdma_capable = False
                    with remote(client.ip, [os], username='******') as rem:
                        for root, dirs, files in rem.os.walk(
                                '/sys/class/infiniband'):
                            for directory in dirs:
                                ports_dir = '/'.join(
                                    [root, directory, 'ports'])
                                if not rem.os.path.exists(ports_dir):
                                    continue
                                for sub_root, sub_dirs, _ in rem.os.walk(
                                        ports_dir):
                                    if sub_root != ports_dir:
                                        continue
                                    for sub_directory in sub_dirs:
                                        state_file = '/'.join(
                                            [sub_root, sub_directory, 'state'])
                                        if rem.os.path.exists(state_file):
                                            if 'ACTIVE' in client.run(
                                                    'cat {0}'.format(
                                                        state_file)):
                                                rdma_capable = True
                    storagerouter.rdma_capable = rdma_capable
                    change = True
                if change is True:
                    storagerouter.save()

            working_version = 5

        # Version 6 introduced:
        # - Distributed scrubbing
        if working_version < 6:
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.sshclient import SSHClient
            for storage_driver in StorageDriverList.get_storagedrivers():
                root_client = SSHClient(storage_driver.storagerouter,
                                        username='******')
                for partition in storage_driver.partitions:
                    if partition.role == DiskPartition.ROLES.SCRUB:
                        old_path = partition.path
                        partition.sub_role = None
                        partition.save()
                        partition.invalidate_dynamics(['folder', 'path'])
                        if root_client.dir_exists(partition.path):
                            continue  # New directory already exists
                        if '_mds_' in old_path:
                            if root_client.dir_exists(old_path):
                                root_client.symlink({partition.path: old_path})
                        if not root_client.dir_exists(partition.path):
                            root_client.dir_create(partition.path)
                        root_client.dir_chmod(partition.path, 0777)

            working_version = 6

        # Version 7 introduced:
        # - vPool status
        if working_version < 7:
            from ovs.dal.hybrids import vpool
            reload(vpool)
            from ovs.dal.hybrids.vpool import VPool
            from ovs.dal.lists.vpoollist import VPoolList
            for _vpool in VPoolList.get_vpools():
                vpool = VPool(_vpool.guid)
                if hasattr(vpool, 'status') and vpool.status is None:
                    vpool.status = VPool.STATUSES.RUNNING
                    vpool.save()

            working_version = 7

        # Version 10 introduced:
        # - Reverse indexes are stored in persistent store
        # - Store more non-changing metadata on disk iso using a dynamic property
        if working_version < 10:
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.datalist import DataList
            from ovs.extensions.storage.persistentfactory import PersistentFactory
            from ovs.extensions.storage.volatilefactory import VolatileFactory
            persistent = PersistentFactory.get_client()
            for prefix in ['ovs_listcache', 'ovs_reverseindex']:
                for key in persistent.prefix(prefix):
                    persistent.delete(key)
            for key in persistent.prefix('ovs_data_'):
                persistent.set(key, persistent.get(key))
            base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}'
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                all_objects = DataList(cls, {
                    'type': DataList.where_operator.AND,
                    'items': []
                })
                for item in all_objects:
                    guid = item.guid
                    for relation in item._relations:
                        if relation.foreign_type is None:
                            rcls = cls
                            rclsname = rcls.__name__.lower()
                        else:
                            rcls = relation.foreign_type
                            rclsname = rcls.__name__.lower()
                        key = relation.name
                        rguid = item._data[key]['guid']
                        if rguid is not None:
                            reverse_key = base_reverse_key.format(
                                rclsname, rguid, relation.foreign_key, guid)
                            persistent.set(reverse_key, 0)
            volatile = VolatileFactory.get_client()
            try:
                volatile._client.flush_all()
            except:
                pass
            from ovs.dal.lists.vdisklist import VDiskList
            for vdisk in VDiskList.get_vdisks():
                try:
                    vdisk.metadata = {
                        'lba_size': vdisk.info['lba_size'],
                        'cluster_multiplier': vdisk.info['cluster_multiplier']
                    }
                    vdisk.save()
                except:
                    pass

            working_version = 10

        # Version 11 introduced:
        # - ALBA accelerated ALBA, meaning different vpool.metadata information
        if working_version < 11:
            from ovs.dal.lists.vpoollist import VPoolList

            for vpool in VPoolList.get_vpools():
                vpool.metadata = {'backend': vpool.metadata}
                if 'metadata' in vpool.metadata['backend']:
                    vpool.metadata['backend'][
                        'arakoon_config'] = vpool.metadata['backend'].pop(
                            'metadata')
                if 'backend_info' in vpool.metadata['backend']:
                    vpool.metadata['backend']['backend_info'][
                        'fragment_cache_on_read'] = True
                    vpool.metadata['backend']['backend_info'][
                        'fragment_cache_on_write'] = False
                vpool.save()
            working_version = 11

        return working_version
Exemplo n.º 10
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(
                    MessageController.Type.EVENT, {
                        'type': 'vmachine_created',
                        'metadata': {
                            'name': vm_object['name']
                        }
                    })
            elif vmachine.name != vm_object['name']:
                MessageController.fire(
                    MessageController.Type.EVENT, {
                        'type': 'vmachine_renamed',
                        'metadata': {
                            'old_name': vmachine.name,
                            'new_name': vm_object['name']
                        }
                    })
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{}:{}'.format(storagedriver.storage_ip,
                                               storagedriver.mountpoint),
                                storagedriver)
                               for storagedriver in storagedrivers])
            vdisk_guids = []
            for disk in vm_object['disks']:
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        vdisk = VDiskList.get_by_devicename_and_vpool(
                            disk['filename'], datastores[datastore].vpool)
                        if vdisk is None:
                            # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                            vdisk = VDisk()
                            vdisk.vpool = datastores[datastore].vpool
                            vdisk.reload_client()
                            vdisk.devicename = disk['filename']
                            vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(
                                str(disk['backingfilename']))
                            vdisk.size = vdisk.info['volume_size']
                            MDSServiceController.ensure_safety(vdisk)
                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(
                                MessageController.Type.EVENT, {
                                    'type': 'vdisk_attached',
                                    'metadata': {
                                        'vmachine_name': vmachine.name,
                                        'vdisk_name': disk['name']
                                    }
                                })
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(
                        MessageController.Type.EVENT, {
                            'type': 'vdisk_detached',
                            'metadata': {
                                'vmachine_name': vmachine.name,
                                'vdisk_name': vdisk.name
                            }
                        })
                    vdisk.vmachine = None
                    vdisk.save()

            logger.info(
                'Updating vMachine finished (name {}, {} vdisks (re)linked)'.
                format(vmachine.name, vdisks_synced))
        except Exception as ex:
            logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Exemplo n.º 11
0
    def _bootstrap_dal_models(self):
        """
        Load/hook dal models as snmp oids
        """
        _guids = set()

        enabled_key = "{0}_config_dal_enabled".format(STORAGE_PREFIX)
        self.instance_oid = 0
        try:
            enabled = self.persistent.get(enabled_key)
        except KeyNotFoundException:
            enabled = True  # Enabled by default, can be disabled by setting the key
        if enabled:
            from ovs.dal.lists.vdisklist import VDiskList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.dal.lists.pmachinelist import PMachineList
            from ovs.dal.lists.vmachinelist import VMachineList
            from ovs.dal.lists.vpoollist import VPoolList
            from ovs.dal.lists.storagedriverlist import StorageDriverList

            for storagerouter in StorageRouterList.get_storagerouters():
                _guids.add(storagerouter.guid)
                if not self._check_added(storagerouter):
                    self._register_dal_model(10, storagerouter, 'guid', "0")
                    self._register_dal_model(10, storagerouter, 'name', "1")
                    self._register_dal_model(10, storagerouter, 'pmachine', "3", key = 'host_status')
                    self._register_dal_model(10, storagerouter, 'description', "4")
                    self._register_dal_model(10, storagerouter, 'devicename', "5")
                    self._register_dal_model(10, storagerouter, 'dtl_mode', "6")
                    self._register_dal_model(10, storagerouter, 'ip', "8")
                    self._register_dal_model(10, storagerouter, 'machineid', "9")
                    self._register_dal_model(10, storagerouter, 'status', "10")
                    self._register_dal_model(10, storagerouter, '#vdisks', "11",
                                             func = lambda storagerouter: len([vdisk for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]),
                                             atype = int)
                    self._register_dal_model(10, storagerouter, '#vmachines', "12",
                                             func = lambda storagerouter: len(set([vdisk.vmachine.guid for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id])),
                                             atype = int)
                    self._register_dal_model(10, storagerouter, '#stored_data', "13",
                                             func = lambda storagerouter: sum([vdisk.vmachine.stored_data for vpool_vdisks in [storagedriver.vpool.vdisks for storagedriver in storagerouter.storagedrivers] for vdisk in vpool_vdisks if vdisk.storagedriver_id == storagedriver.storagedriver_id]),
                                             atype = int)
                    self.instance_oid += 1

            for vm in VMachineList.get_vmachines():
                _guids.add(vm.guid)
                if not self._check_added(vm):
                    if vm.is_vtemplate:
                        self._register_dal_model(11, vm, 'guid', "0")
                        self._register_dal_model(11, vm, 'name', "1")

                        def _children(vmt):
                            children = 0
                            disks = [vd.guid for vd in vmt.vdisks]
                            for vdisk in [vdisk.parent_vdisk_guid for item in [vm.vdisks for vm in VMachineList.get_vmachines() if not vm.is_vtemplate] for vdisk in item]:
                                for disk in disks:
                                    if vdisk == disk:
                                        children += 1
                            return children
                        self._register_dal_model(11, vm, '#children', 2, func = _children, atype = int)
                        self.instance_oid += 1

            for vm in VMachineList.get_vmachines():
                _guids.add(vm.guid)
                if not self._check_added(vm):
                    if not vm.is_vtemplate:
                        self._register_dal_model(0, vm, 'guid', "0")
                        self._register_dal_model(0, vm, 'name', "1")
                        self._register_dal_model(0, vm, 'statistics', "2.0", key = "operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.2", key = "data_read", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.6", key = "write_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.11", key = "backend_data_read", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.12", key = "cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.16", key = "backend_data_written", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.17", key = "data_read_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.18", key = "read_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.20", key = "data_written_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.23", key = "timestamp", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.27", key = "data_written", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.30", key = "operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                        self._register_dal_model(0, vm, 'statistics', "2.34", key = "data_transferred", atype = int)
                        self._register_dal_model(0, vm, 'stored_data', "3", atype = int)
                        self._register_dal_model(0, vm, 'description', "4")
                        self._register_dal_model(0, vm, 'devicename', "5")
                        self._register_dal_model(0, vm, 'dtl_mode', "6")
                        self._register_dal_model(0, vm, 'hypervisorid', "7")
                        self._register_dal_model(0, vm, 'ip', "8")
                        self._register_dal_model(0, vm, 'status', "10")
                        self._register_dal_model(0, vm, 'stored_data', "10", atype = int)
                        self._register_dal_model(0, vm, 'snapshots', "11", atype = int)
                        self._register_dal_model(0, vm, 'vdisks', "12", atype = int)
                        self._register_dal_model(0, vm, 'DTL', '13',
                                                 func = lambda vm: 'DEGRADED' if all(item == 'DEGRADED' for item in [vd.info['failover_mode'] for vd in vm.vdisks]) else 'OK')
                    self.instance_oid += 1

            for vd in VDiskList.get_vdisks():
                _guids.add(vd.guid)
                if not self._check_added(vd):
                    self._register_dal_model(1, vd, 'guid', "0")
                    self._register_dal_model(1, vd, 'name', "1")
                    self._register_dal_model(1, vd, 'statistics', "2.0", key = "operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.1", key = "data_written_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.2", key = "data_read", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.6", key = "write_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.11", key = "backend_data_read", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.12", key = "cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.16", key = "backend_data_written", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.17", key = "data_read_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.18", key = "read_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.20", key = "cluster_cache_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.23", key = "timestamp", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.27", key = "data_written", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.30", key = "operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                    self._register_dal_model(1, vd, 'statistics', "2.34", key = "data_transferred", atype = int)
                    self._register_dal_model(1, vd, 'info', "3", key = 'stored', atype = int)
                    self._register_dal_model(1, vd, 'info', "4", key = 'failover_mode', atype = int)
                    self._register_dal_model(1, vd, 'snapshots', "5", atype = int)
                    self.instance_oid += 1

            for pm in PMachineList.get_pmachines():
                _guids.add(pm.guid)
                if not self._check_added(pm):
                    self._register_dal_model(2, pm, 'guid', "0")
                    self._register_dal_model(2, pm, 'name', "1")
                    self._register_dal_model(2, pm, 'host_status', "2")
                    self.instance_oid += 1

            for vp in VPoolList.get_vpools():
                _guids.add(vp.guid)
                if not self._check_added(vp):
                    self._register_dal_model(3, vp, 'guid', "0")
                    self._register_dal_model(3, vp, 'name', "1")
                    self._register_dal_model(3, vp, 'statistics', "2.0", key = "operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.1", key = "cluster_cache_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.2", key = "data_read", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.3", key = "sco_cache_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.4", key = "sco_cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.5", key = "sco_cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.6", key = "write_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.7", key = "cluster_cache_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.8", key = "read_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.9", key = "sco_cache_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.10", key = "backend_write_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.11", key = "backend_data_read", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.12", key = "cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.13", key = "backend_write_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.14", key = "metadata_store_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.15", key = "metadata_store_misses", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.16", key = "backend_data_written", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.17", key = "data_read_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.18", key = "read_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.19", key = "cluster_cache_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.20", key = "data_written_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.21", key = "cluster_cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.22", key = "cache_hits_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.23", key = "timestamp", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.24", key = "metadata_store_misses_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.25", key = "backend_data_written_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.26", key = "backend_read_operations", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.27", key = "data_written", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.28", key = "metadata_store_hits", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.29", key = "backend_data_read_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.30", key = "operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.31", key = "backend_read_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.32", key = "data_transferred_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.33", key = "write_operations_ps", atype = int)
                    self._register_dal_model(3, vp, 'statistics', "2.34", key = "data_transferred", atype = int)
                    self._register_dal_model(3, vp, 'status', "3")
                    self._register_dal_model(3, vp, 'description', "4")
                    self._register_dal_model(3, vp, 'vdisks', "5", atype = int)
                    self._register_dal_model(3, vp, '#vmachines', "6",
                                             func = lambda vp: len(set([vd.vmachine.guid for vd in vp.vdisks])),
                                             atype = int)
                    self.instance_oid += 1

            for storagedriver in StorageDriverList.get_storagedrivers():
                _guids.add(storagedriver.guid)
                if not self._check_added(storagedriver):
                    self._register_dal_model(4, storagedriver, 'guid', "0")
                    self._register_dal_model(4, storagedriver, 'name', "1")
                    self._register_dal_model(4, storagedriver, 'stored_data', "2", atype = int)
                    self.instance_oid += 1

            try:
                # try to load OVS Backends
                from ovs.dal.lists.albabackendlist import AlbaBackendList
                for backend in AlbaBackendList.get_albabackends():
                    _guids.add(backend.guid)
                    if not self._check_added(backend):
                        self._register_dal_model(5, backend, 'guid', 0)
                        self._register_dal_model(5, backend, 'name', 1)
                        for disk_id in range(len((backend.all_disks))):
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.0'.format(disk_id), key = "name", index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.1'.format(disk_id), key = "usage.size", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.2'.format(disk_id), key = "usage.used", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.3'.format(disk_id), key = "usage.available", atype = long, index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.4'.format(disk_id), key = "state.state", index=disk_id)
                            self._register_dal_model(5, backend, 'all_disks', '2.{0}.5'.format(disk_id), key = "node_id", index=disk_id)

                        self.instance_oid += 1
            except ImportError:
                print('OVS Backend not present')
                pass
            reload = False
            for object_guid in list(self.model_oids):
                if object_guid not in _guids:
                    self.model_oids.remove(object_guid)
                    reload = True
            if reload:
                self._reload_snmp()
Exemplo n.º 12
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{}:{}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            for disk in vm_object['disks']:
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                        if vdisk is None:
                            # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                            vdisk = VDisk()
                            vdisk.vpool = datastores[datastore].vpool
                            vdisk.reload_client()
                            vdisk.devicename = disk['filename']
                            vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                            vdisk.size = vdisk.info['volume_size']
                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            logger.info('Updating vMachine finished (name {}, {} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Exemplo n.º 13
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter)
                            scrub_locations[
                                storage_driver.storagerouter] = str(
                                    partition.path)
                        except UnableToConnectException:
                            logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(
                    ScheduledTaskController._execute_scrub_work.s(
                        scrub_location=scrub_info[1],
                        vdisk_guids=vdisk_guids_to_scrub).apply_async(
                            routing_key='sr.{0}'.format(
                                storage_router.machine_id)))
                storage_router_list.append(storage_router)

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))
        all_results = result_set.join(
            propagate=False
        )  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(storage_router_list[index].ip, result))
        if len(processed_guids) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        logger.info('Gather Scrub - Finished')
Exemplo n.º 14
0
    def migrate(previous_version):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        verison 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        @param previous_version: The previous version from which to start the migration.
        """

        working_version = previous_version

        # Version 1 introduced:
        # - The datastore is still empty, add defaults
        if working_version < 1:
            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.backendtype import BackendType
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding
            from ovs.dal.lists.backendtypelist import BackendTypeList

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters +
                                                                  string.digits +
                                                                  '|_=+*#@!/-[]{}<>.?,\'";:~')
                                                    for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [
                (admin_group, [read_role, write_role, manage_role]),
                (viewers_group, [read_role])
            ]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add backends
            for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'),
                                      ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in ['MetadataServer', 'AlbaProxy', 'Arakoon']:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Brandings
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

            # We're now at version 1
            working_version = 1

        # Version 2 introduced:
        # - new Descriptor format
        if working_version < 2:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']:
                        filename = data[entry]['source']
                        if not filename.startswith('/'):
                            filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename)
                        module = imp.load_source(data[entry]['name'], filename)
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            # We're now at version 2
            working_version = 2

        # Version 3 introduced:
        # - new Descriptor format
        if working_version < 3:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[entry]:
                        module = imp.load_source(data[entry]['name'], data[entry]['source'])
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            working_version = 3

        # Version 4 introduced:
        # - Flexible SSD layout
        if working_version < 4:
            import os
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.extensions.generic.remote import Remote
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
            for service in ServiceTypeList.get_by_name('MetadataServer').services:
                mds_service = service.mds_service
                storagedriver = None
                for current_storagedriver in service.storagerouter.storagedrivers:
                    if current_storagedriver.vpool_guid == mds_service.vpool_guid:
                        storagedriver = current_storagedriver
                        break
                tasks = {}
                if storagedriver._data.get('mountpoint_md'):
                    tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'),
                                                   storagedriver.vpool.name,
                                                   mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS)
                if storagedriver._data.get('mountpoint_temp'):
                    tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'),
                                                   storagedriver.vpool.name,
                                                   mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS)
                for disk in service.storagerouter.disks:
                    for partition in disk.partitions:
                        for directory, (role, subrole) in tasks.iteritems():
                            with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote:
                                stat_dir = directory
                                while not remote.os.path.exists(stat_dir) and stat_dir != '/':
                                    stat_dir = stat_dir.rsplit('/', 1)[0]
                                    if not stat_dir:
                                        stat_dir = '/'
                                inode = remote.os.stat(stat_dir).st_dev
                            if partition.inode == inode:
                                if role not in partition.roles:
                                    partition.roles.append(role)
                                    partition.save()
                                number = 0
                                migrated = False
                                for sd_partition in storagedriver.partitions:
                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                        if sd_partition.mds_service == mds_service:
                                            migrated = True
                                            break
                                        if sd_partition.partition_guid == partition.guid:
                                            number = max(sd_partition.number, number)
                                if migrated is False:
                                    sd_partition = StorageDriverPartition()
                                    sd_partition.role = role
                                    sd_partition.sub_role = subrole
                                    sd_partition.partition = partition
                                    sd_partition.storagedriver = storagedriver
                                    sd_partition.mds_service = mds_service
                                    sd_partition.size = None
                                    sd_partition.number = number + 1
                                    sd_partition.save()
                                    client = SSHClient(storagedriver.storagerouter, username='******')
                                    path = sd_partition.path.rsplit('/', 1)[0]
                                    if path:
                                        client.dir_create(path)
                                        client.dir_chown(path, 'ovs', 'ovs')
                                    client.dir_create(directory)
                                    client.dir_chown(directory, 'ovs', 'ovs')
                                    client.symlink({sd_partition.path: directory})
            for storagedriver in StorageDriverList.get_storagedrivers():
                migrated_objects = {}
                for disk in storagedriver.storagerouter.disks:
                    for partition in disk.partitions:
                        # Process all mountpoints that are unique and don't have a specified size
                        for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD,
                                                                                                'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}),
                                                     'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}),
                                                     'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD,
                                                                                                    'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}),
                                                     'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD,
                                                                                                    'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}),
                                                     'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}),
                                                     'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems():
                            if key in storagedriver._data:
                                is_list = isinstance(storagedriver._data[key], list)
                                entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]]
                                for entry in entries:
                                    if not entry:
                                        if is_list:
                                            storagedriver._data[key].remove(entry)
                                            if len(storagedriver._data[key]) == 0:
                                                del storagedriver._data[key]
                                        else:
                                            del storagedriver._data[key]
                                    else:
                                        with Remote(storagedriver.storagerouter.ip, [os], username='******') as remote:
                                            inode = remote.os.stat(entry).st_dev
                                        if partition.inode == inode:
                                            if role not in partition.roles:
                                                partition.roles.append(role)
                                                partition.save()
                                            for folder, subrole in sr_info.iteritems():
                                                number = 0
                                                migrated = False
                                                for sd_partition in storagedriver.partitions:
                                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                                        if sd_partition.partition_guid == partition.guid:
                                                            number = max(sd_partition.number, number)
                                                if migrated is False:
                                                    sd_partition = StorageDriverPartition()
                                                    sd_partition.role = role
                                                    sd_partition.sub_role = subrole
                                                    sd_partition.partition = partition
                                                    sd_partition.storagedriver = storagedriver
                                                    sd_partition.size = None
                                                    sd_partition.number = number + 1
                                                    sd_partition.save()
                                                    if folder:
                                                        source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name))
                                                    else:
                                                        source = entry
                                                    client = SSHClient(storagedriver.storagerouter, username='******')
                                                    path = sd_partition.path.rsplit('/', 1)[0]
                                                    if path:
                                                        client.dir_create(path)
                                                        client.dir_chown(path, 'ovs', 'ovs')
                                                    client.symlink({sd_partition.path: source})
                                                    migrated_objects[source] = sd_partition
                                            if is_list:
                                                storagedriver._data[key].remove(entry)
                                                if len(storagedriver._data[key]) == 0:
                                                    del storagedriver._data[key]
                                            else:
                                                del storagedriver._data[key]
                                            storagedriver.save()
                if 'mountpoint_bfs' in storagedriver._data:
                    storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs']
                    if not storagedriver.mountpoint_dfs:
                        storagedriver.mountpoint_dfs = None
                    del storagedriver._data['mountpoint_bfs']
                    storagedriver.save()
                if 'mountpoint_temp' in storagedriver._data:
                    del storagedriver._data['mountpoint_temp']
                    storagedriver.save()
                if migrated_objects:
                    print 'Loading sizes'
                    config = StorageDriverConfiguration('storagedriver', storagedriver.vpool.name)
                    config.load(SSHClient(storagedriver.storagerouter, username='******'))
                    for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []):
                        path = readcache.get('path', '').rsplit('/', 1)[0]
                        size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()
                    for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []):
                        path = writecache.get('path', '')
                        size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()

            working_version = 4

        return working_version
Exemplo n.º 15
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            _ = SSHClient(storage_driver.storagerouter)
                            scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = ResultSet([])
        storage_router_list = []

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set.add(ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                             vdisk_guids=vdisk_guids_to_scrub).apply_async(
                    routing_key='sr.{0}'.format(storage_router.machine_id)
                ))
                storage_router_list.append(storage_router)

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))
        all_results = result_set.join(propagate=False)  # Propagate False makes sure all jobs are waited for even when 1 or more jobs fail
        for index, result in enumerate(all_results):
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(storage_router_list[index].ip, result))
        if len(processed_guids) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        logger.info('Gather Scrub - Finished')
Exemplo n.º 16
0
    def migrate(previous_version):
        """
        Migrates from any version to any version, running all migrations required
        If previous_version is for example 0 and this script is at
        version 3 it will execute two steps:
          - 1 > 2
          - 2 > 3
        @param previous_version: The previous version from which to start the migration.
        """

        working_version = previous_version

        # Version 1 introduced:
        # - The datastore is still empty, add defaults
        if working_version < 1:
            from ovs.dal.hybrids.user import User
            from ovs.dal.hybrids.group import Group
            from ovs.dal.hybrids.role import Role
            from ovs.dal.hybrids.client import Client
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.hybrids.j_rolegroup import RoleGroup
            from ovs.dal.hybrids.j_roleclient import RoleClient
            from ovs.dal.hybrids.backendtype import BackendType
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.hybrids.branding import Branding
            from ovs.dal.lists.backendtypelist import BackendTypeList

            # Create groups
            admin_group = Group()
            admin_group.name = 'administrators'
            admin_group.description = 'Administrators'
            admin_group.save()
            viewers_group = Group()
            viewers_group.name = 'viewers'
            viewers_group.description = 'Viewers'
            viewers_group.save()

            # Create users
            admin = User()
            admin.username = '******'
            admin.password = hashlib.sha256('admin').hexdigest()
            admin.is_active = True
            admin.group = admin_group
            admin.save()

            # Create internal OAuth 2 clients
            admin_pw_client = Client()
            admin_pw_client.ovs_type = 'INTERNAL'
            admin_pw_client.grant_type = 'PASSWORD'
            admin_pw_client.user = admin
            admin_pw_client.save()
            admin_cc_client = Client()
            admin_cc_client.ovs_type = 'INTERNAL'
            admin_cc_client.grant_type = 'CLIENT_CREDENTIALS'
            admin_cc_client.client_secret = ''.join(random.choice(string.ascii_letters +
                                                                  string.digits +
                                                                  '|_=+*#@!/-[]{}<>.?,\'";:~')
                                                    for _ in range(128))
            admin_cc_client.user = admin
            admin_cc_client.save()

            # Create roles
            read_role = Role()
            read_role.code = 'read'
            read_role.name = 'Read'
            read_role.description = 'Can read objects'
            read_role.save()
            write_role = Role()
            write_role.code = 'write'
            write_role.name = 'Write'
            write_role.description = 'Can write objects'
            write_role.save()
            manage_role = Role()
            manage_role.code = 'manage'
            manage_role.name = 'Manage'
            manage_role.description = 'Can manage the system'
            manage_role.save()

            # Attach groups to roles
            mapping = [
                (admin_group, [read_role, write_role, manage_role]),
                (viewers_group, [read_role])
            ]
            for setting in mapping:
                for role in setting[1]:
                    rolegroup = RoleGroup()
                    rolegroup.group = setting[0]
                    rolegroup.role = role
                    rolegroup.save()
                for user in setting[0].users:
                    for role in setting[1]:
                        for client in user.clients:
                            roleclient = RoleClient()
                            roleclient.client = client
                            roleclient.role = role
                            roleclient.save()

            # Add backends
            for backend_type_info in [('Ceph', 'ceph_s3'), ('Amazon', 'amazon_s3'), ('Swift', 'swift_s3'),
                                      ('Local', 'local'), ('Distributed', 'distributed'), ('ALBA', 'alba')]:
                code = backend_type_info[1]
                backend_type = BackendTypeList.get_backend_type_by_code(code)
                if backend_type is None:
                    backend_type = BackendType()
                backend_type.name = backend_type_info[0]
                backend_type.code = code
                backend_type.save()

            # Add service types
            for service_type_info in [ServiceType.SERVICE_TYPES.MD_SERVER, ServiceType.SERVICE_TYPES.ALBA_PROXY, ServiceType.SERVICE_TYPES.ARAKOON]:
                service_type = ServiceType()
                service_type.name = service_type_info
                service_type.save()

            # Branding
            branding = Branding()
            branding.name = 'Default'
            branding.description = 'Default bootstrap theme'
            branding.css = 'bootstrap-default.min.css'
            branding.productname = 'Open vStorage'
            branding.is_default = True
            branding.save()
            slate = Branding()
            slate.name = 'Slate'
            slate.description = 'Dark bootstrap theme'
            slate.css = 'bootstrap-slate.min.css'
            slate.productname = 'Open vStorage'
            slate.is_default = False
            slate.save()

            # Failure Domain
            failure_domain = FailureDomain()
            failure_domain.name = 'Default'
            failure_domain.save()

            # We're now at version 1
            working_version = 1

        # Version 2 introduced:
        # - new Descriptor format
        if working_version < 2:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[entry] and 'hybrids' in data[entry]['source']:
                        filename = data[entry]['source']
                        if not filename.startswith('/'):
                            filename = '/opt/OpenvStorage/ovs/dal/{0}'.format(filename)
                        module = imp.load_source(data[entry]['name'], filename)
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            # We're now at version 2
            working_version = 2

        # Version 3 introduced:
        # - new Descriptor format
        if working_version < 3:
            import imp
            from ovs.dal.helpers import Descriptor
            from ovs.extensions.storage.persistentfactory import PersistentFactory

            client = PersistentFactory.get_client()
            keys = client.prefix('ovs_data')
            for key in keys:
                data = client.get(key)
                modified = False
                for entry in data.keys():
                    if isinstance(data[entry], dict) and 'source' in data[entry]:
                        module = imp.load_source(data[entry]['name'], data[entry]['source'])
                        cls = getattr(module, data[entry]['type'])
                        new_data = Descriptor(cls, cached=False).descriptor
                        if 'guid' in data[entry]:
                            new_data['guid'] = data[entry]['guid']
                        data[entry] = new_data
                        modified = True
                if modified is True:
                    data['_version'] += 1
                    client.set(key, data)

            working_version = 3

        # Version 4 introduced:
        # - Flexible SSD layout
        if working_version < 4:
            import os
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.hybrids.j_storagedriverpartition import StorageDriverPartition
            from ovs.dal.hybrids.servicetype import ServiceType
            from ovs.dal.lists.servicetypelist import ServiceTypeList
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
            for service in ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.MD_SERVER).services:
                mds_service = service.mds_service
                storagedriver = None
                for current_storagedriver in service.storagerouter.storagedrivers:
                    if current_storagedriver.vpool_guid == mds_service.vpool_guid:
                        storagedriver = current_storagedriver
                        break
                tasks = {}
                if storagedriver._data.get('mountpoint_md'):
                    tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_md'),
                                                   storagedriver.vpool.name,
                                                   mds_service.number)] = (DiskPartition.ROLES.DB, StorageDriverPartition.SUBROLE.MDS)
                if storagedriver._data.get('mountpoint_temp'):
                    tasks['{0}/mds_{1}_{2}'.format(storagedriver._data.get('mountpoint_temp'),
                                                   storagedriver.vpool.name,
                                                   mds_service.number)] = (DiskPartition.ROLES.SCRUB, StorageDriverPartition.SUBROLE.MDS)
                for disk in service.storagerouter.disks:
                    for partition in disk.partitions:
                        for directory, (role, subrole) in tasks.iteritems():
                            with remote(storagedriver.storagerouter.ip, [os], username='******') as rem:
                                stat_dir = directory
                                while not rem.os.path.exists(stat_dir) and stat_dir != '/':
                                    stat_dir = stat_dir.rsplit('/', 1)[0]
                                    if not stat_dir:
                                        stat_dir = '/'
                                inode = rem.os.stat(stat_dir).st_dev
                            if partition.inode == inode:
                                if role not in partition.roles:
                                    partition.roles.append(role)
                                    partition.save()
                                number = 0
                                migrated = False
                                for sd_partition in storagedriver.partitions:
                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                        if sd_partition.mds_service == mds_service:
                                            migrated = True
                                            break
                                        if sd_partition.partition_guid == partition.guid:
                                            number = max(sd_partition.number, number)
                                if migrated is False:
                                    sd_partition = StorageDriverPartition()
                                    sd_partition.role = role
                                    sd_partition.sub_role = subrole
                                    sd_partition.partition = partition
                                    sd_partition.storagedriver = storagedriver
                                    sd_partition.mds_service = mds_service
                                    sd_partition.size = None
                                    sd_partition.number = number + 1
                                    sd_partition.save()
                                    client = SSHClient(storagedriver.storagerouter, username='******')
                                    path = sd_partition.path.rsplit('/', 1)[0]
                                    if path:
                                        client.dir_create(path)
                                        client.dir_chown(path, 'ovs', 'ovs')
                                    client.dir_create(directory)
                                    client.dir_chown(directory, 'ovs', 'ovs')
                                    client.symlink({sd_partition.path: directory})
            for storagedriver in StorageDriverList.get_storagedrivers():
                migrated_objects = {}
                for disk in storagedriver.storagerouter.disks:
                    for partition in disk.partitions:
                        # Process all mountpoints that are unique and don't have a specified size
                        for key, (role, sr_info) in {'mountpoint_md': (DiskPartition.ROLES.DB, {'metadata_{0}': StorageDriverPartition.SUBROLE.MD,
                                                                                                'tlogs_{0}': StorageDriverPartition.SUBROLE.TLOG}),
                                                     'mountpoint_fragmentcache': (DiskPartition.ROLES.WRITE, {'fcache_{0}': StorageDriverPartition.SUBROLE.FCACHE}),
                                                     'mountpoint_foc': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD,
                                                                                                    'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}),
                                                     'mountpoint_dtl': (DiskPartition.ROLES.WRITE, {'fd_{0}': StorageDriverPartition.SUBROLE.FD,
                                                                                                    'dtl_{0}': StorageDriverPartition.SUBROLE.DTL}),
                                                     'mountpoint_readcaches': (DiskPartition.ROLES.READ, {'': None}),
                                                     'mountpoint_writecaches': (DiskPartition.ROLES.WRITE, {'sco_{0}': StorageDriverPartition.SUBROLE.SCO})}.iteritems():
                            if key in storagedriver._data:
                                is_list = isinstance(storagedriver._data[key], list)
                                entries = storagedriver._data[key][:] if is_list is True else [storagedriver._data[key]]
                                for entry in entries:
                                    if not entry:
                                        if is_list:
                                            storagedriver._data[key].remove(entry)
                                            if len(storagedriver._data[key]) == 0:
                                                del storagedriver._data[key]
                                        else:
                                            del storagedriver._data[key]
                                    else:
                                        with remote(storagedriver.storagerouter.ip, [os], username='******') as rem:
                                            inode = rem.os.stat(entry).st_dev
                                        if partition.inode == inode:
                                            if role not in partition.roles:
                                                partition.roles.append(role)
                                                partition.save()
                                            for folder, subrole in sr_info.iteritems():
                                                number = 0
                                                migrated = False
                                                for sd_partition in storagedriver.partitions:
                                                    if sd_partition.role == role and sd_partition.sub_role == subrole:
                                                        if sd_partition.partition_guid == partition.guid:
                                                            number = max(sd_partition.number, number)
                                                if migrated is False:
                                                    sd_partition = StorageDriverPartition()
                                                    sd_partition.role = role
                                                    sd_partition.sub_role = subrole
                                                    sd_partition.partition = partition
                                                    sd_partition.storagedriver = storagedriver
                                                    sd_partition.size = None
                                                    sd_partition.number = number + 1
                                                    sd_partition.save()
                                                    if folder:
                                                        source = '{0}/{1}'.format(entry, folder.format(storagedriver.vpool.name))
                                                    else:
                                                        source = entry
                                                    client = SSHClient(storagedriver.storagerouter, username='******')
                                                    path = sd_partition.path.rsplit('/', 1)[0]
                                                    if path:
                                                        client.dir_create(path)
                                                        client.dir_chown(path, 'ovs', 'ovs')
                                                    client.symlink({sd_partition.path: source})
                                                    migrated_objects[source] = sd_partition
                                            if is_list:
                                                storagedriver._data[key].remove(entry)
                                                if len(storagedriver._data[key]) == 0:
                                                    del storagedriver._data[key]
                                            else:
                                                del storagedriver._data[key]
                                            storagedriver.save()
                if 'mountpoint_bfs' in storagedriver._data:
                    storagedriver.mountpoint_dfs = storagedriver._data['mountpoint_bfs']
                    if not storagedriver.mountpoint_dfs:
                        storagedriver.mountpoint_dfs = None
                    del storagedriver._data['mountpoint_bfs']
                    storagedriver.save()
                if 'mountpoint_temp' in storagedriver._data:
                    del storagedriver._data['mountpoint_temp']
                    storagedriver.save()
                if migrated_objects:
                    print 'Loading sizes'
                    config = StorageDriverConfiguration('storagedriver', storagedriver.vpool_guid, storagedriver.storagedriver_id)
                    config.load()
                    for readcache in config.configuration.get('content_addressed_cache', {}).get('clustercache_mount_points', []):
                        path = readcache.get('path', '').rsplit('/', 1)[0]
                        size = int(readcache['size'].strip('KiB')) * 1024 if 'size' in readcache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()
                    for writecache in config.configuration.get('scocache', {}).get('scocache_mount_points', []):
                        path = writecache.get('path', '')
                        size = int(writecache['size'].strip('KiB')) * 1024 if 'size' in writecache else None
                        if path in migrated_objects:
                            migrated_objects[path].size = long(size)
                            migrated_objects[path].save()

            working_version = 4

        # Version 5 introduced:
        # - Failure Domains
        if working_version < 5:
            import os
            from ovs.dal.hybrids.failuredomain import FailureDomain
            from ovs.dal.lists.failuredomainlist import FailureDomainList
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            from ovs.extensions.generic.remote import remote
            from ovs.extensions.generic.sshclient import SSHClient
            failure_domains = FailureDomainList.get_failure_domains()
            if len(failure_domains) > 0:
                failure_domain = failure_domains[0]
            else:
                failure_domain = FailureDomain()
                failure_domain.name = 'Default'
                failure_domain.save()
            for storagerouter in StorageRouterList.get_storagerouters():
                change = False
                if storagerouter.primary_failure_domain is None:
                    storagerouter.primary_failure_domain = failure_domain
                    change = True
                if storagerouter.rdma_capable is None:
                    client = SSHClient(storagerouter, username='******')
                    rdma_capable = False
                    with remote(client.ip, [os], username='******') as rem:
                        for root, dirs, files in rem.os.walk('/sys/class/infiniband'):
                            for directory in dirs:
                                ports_dir = '/'.join([root, directory, 'ports'])
                                if not rem.os.path.exists(ports_dir):
                                    continue
                                for sub_root, sub_dirs, _ in rem.os.walk(ports_dir):
                                    if sub_root != ports_dir:
                                        continue
                                    for sub_directory in sub_dirs:
                                        state_file = '/'.join([sub_root, sub_directory, 'state'])
                                        if rem.os.path.exists(state_file):
                                            if 'ACTIVE' in client.run('cat {0}'.format(state_file)):
                                                rdma_capable = True
                    storagerouter.rdma_capable = rdma_capable
                    change = True
                if change is True:
                    storagerouter.save()

            working_version = 5

        # Version 6 introduced:
        # - Distributed scrubbing
        if working_version < 6:
            from ovs.dal.hybrids.diskpartition import DiskPartition
            from ovs.dal.lists.storagedriverlist import StorageDriverList
            from ovs.extensions.generic.sshclient import SSHClient
            for storage_driver in StorageDriverList.get_storagedrivers():
                root_client = SSHClient(storage_driver.storagerouter, username='******')
                for partition in storage_driver.partitions:
                    if partition.role == DiskPartition.ROLES.SCRUB:
                        old_path = partition.path
                        partition.sub_role = None
                        partition.save()
                        partition.invalidate_dynamics(['folder', 'path'])
                        if root_client.dir_exists(partition.path):
                            continue  # New directory already exists
                        if '_mds_' in old_path:
                            if root_client.dir_exists(old_path):
                                root_client.symlink({partition.path: old_path})
                        if not root_client.dir_exists(partition.path):
                            root_client.dir_create(partition.path)
                        root_client.dir_chmod(partition.path, 0777)

            working_version = 6

        # Version 7 introduced:
        # - vPool status
        if working_version < 7:
            from ovs.dal.hybrids import vpool
            reload(vpool)
            from ovs.dal.hybrids.vpool import VPool
            from ovs.dal.lists.vpoollist import VPoolList
            for _vpool in VPoolList.get_vpools():
                vpool = VPool(_vpool.guid)
                if hasattr(vpool, 'status') and vpool.status is None:
                    vpool.status = VPool.STATUSES.RUNNING
                    vpool.save()

            working_version = 7

        # Version 10 introduced:
        # - Reverse indexes are stored in persistent store
        # - Store more non-changing metadata on disk iso using a dynamic property
        if working_version < 10:
            from ovs.dal.helpers import HybridRunner, Descriptor
            from ovs.dal.datalist import DataList
            from ovs.extensions.storage.persistentfactory import PersistentFactory
            from ovs.extensions.storage.volatilefactory import VolatileFactory
            persistent = PersistentFactory.get_client()
            for prefix in ['ovs_listcache', 'ovs_reverseindex']:
                for key in persistent.prefix(prefix):
                    persistent.delete(key)
            for key in persistent.prefix('ovs_data_'):
                persistent.set(key, persistent.get(key))
            base_reverse_key = 'ovs_reverseindex_{0}_{1}|{2}|{3}'
            hybrid_structure = HybridRunner.get_hybrids()
            for class_descriptor in hybrid_structure.values():
                cls = Descriptor().load(class_descriptor).get_object()
                all_objects = DataList(cls, {'type': DataList.where_operator.AND,
                                             'items': []})
                for item in all_objects:
                    guid = item.guid
                    for relation in item._relations:
                        if relation.foreign_type is None:
                            rcls = cls
                            rclsname = rcls.__name__.lower()
                        else:
                            rcls = relation.foreign_type
                            rclsname = rcls.__name__.lower()
                        key = relation.name
                        rguid = item._data[key]['guid']
                        if rguid is not None:
                            reverse_key = base_reverse_key.format(rclsname, rguid, relation.foreign_key, guid)
                            persistent.set(reverse_key, 0)
            volatile = VolatileFactory.get_client()
            try:
                volatile._client.flush_all()
            except:
                pass
            from ovs.dal.lists.vdisklist import VDiskList
            for vdisk in VDiskList.get_vdisks():
                try:
                    vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                      'cluster_multiplier': vdisk.info['cluster_multiplier']}
                    vdisk.save()
                except:
                    pass

            working_version = 10

        # Version 11 introduced:
        # - ALBA accelerated ALBA, meaning different vpool.metadata information
        if working_version < 11:
            from ovs.dal.lists.vpoollist import VPoolList

            for vpool in VPoolList.get_vpools():
                vpool.metadata = {'backend': vpool.metadata}
                if 'metadata' in vpool.metadata['backend']:
                    vpool.metadata['backend']['arakoon_config'] = vpool.metadata['backend'].pop('metadata')
                if 'backend_info' in vpool.metadata['backend']:
                    vpool.metadata['backend']['backend_info']['fragment_cache_on_read'] = True
                    vpool.metadata['backend']['backend_info']['fragment_cache_on_write'] = False
                vpool.save()
            working_version = 11

        return working_version
Exemplo n.º 17
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        :param vmachine: Virtual Machine to update
        :param vm_object: New virtual machine info
        :param pmachine: Physical machine of the virtual machine
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename))
            for disk in vm_object['disks']:
                ensure_safety = False
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        try:
                            mutex.acquire(wait=10)
                            vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                            if vdisk is None:
                                # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                                vdisk = VDisk()
                                vdisk.vpool = datastores[datastore].vpool
                                vdisk.reload_client()
                                vdisk.devicename = disk['filename']
                                vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                                vdisk.size = vdisk.info['volume_size']
                                vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                                  'cluster_multiplier': vdisk.info['cluster_multiplier']}
                                # Create the disk in a locked context, but don't execute long running-task in same context
                                vdisk.save()
                                ensure_safety = True
                        finally:
                            mutex.release()
                        if ensure_safety:
                            MDSServiceController.ensure_safety(vdisk)
                            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)

                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Exemplo n.º 18
0
    def verify_vdisk_cache_quota():
        """
        Validates whether the caching quota is reaching its limits or has surpassed it
        Each vDisk can consume a part of the total fragment caching capacity
        """
        MonitoringController._logger.info(
            'Starting vDisk caching quota verification...')
        alba_guid_size_map = {}
        for storagedriver in StorageDriverList.get_storagedrivers():
            storagedriver.invalidate_dynamics(
                ['vpool_backend_info', 'vdisks_guids'])

            for cache_type in [[
                    'cache_quota_fc', 'backend_info', 'connection_info',
                    'fragment'
            ],
                               [
                                   'cache_quota_bc',
                                   'block_cache_backend_info',
                                   'block_cache_connection_info', 'block'
                               ]]:
                cache_quota = storagedriver.vpool_backend_info[cache_type[0]]
                backend_info = storagedriver.vpool_backend_info[cache_type[1]]
                connection_info = storagedriver.vpool_backend_info[
                    cache_type[2]]
                if backend_info is None or connection_info is None:
                    continue

                alba_backend_name = backend_info['name']
                alba_backend_host = connection_info['host']
                alba_backend_guid = backend_info['alba_backend_guid']
                if alba_backend_guid not in alba_guid_size_map:
                    ovs_client = OVSClient(
                        ip=alba_backend_host,
                        port=connection_info['port'],
                        credentials=(connection_info['client_id'],
                                     connection_info['client_secret']),
                        version=2,
                        cache_store=VolatileFactory.get_client())
                    try:
                        alba_guid_size_map[alba_backend_guid] = {
                            'name':
                            alba_backend_name,
                            'backend_ip':
                            alba_backend_host,
                            'total_size':
                            ovs_client.get('/alba/backends/{0}/'.format(
                                alba_backend_guid),
                                           params={'contents': 'usages'
                                                   })['usages']['size'],
                            'requested_size':
                            0
                        }
                    except Exception:
                        MonitoringController._logger.exception(
                            'Failed to retrieve ALBA Backend info for {0} on host {1}'
                            .format(alba_backend_name, alba_backend_host))
                        continue

                for vdisk_guid in storagedriver.vdisks_guids:
                    vdisk = VDisk(vdisk_guid)
                    vdisk_cq = vdisk.cache_quota.get(
                        cache_type[3]
                    ) if vdisk.cache_quota is not None else None
                    if vdisk_cq is None:
                        alba_guid_size_map[alba_backend_guid][
                            'requested_size'] += cache_quota if cache_quota is not None else 0
                    else:
                        alba_guid_size_map[alba_backend_guid][
                            'requested_size'] += vdisk_cq

        local_ips = [sr.ip for sr in StorageRouterList.get_storagerouters()]
        for alba_backend_info in alba_guid_size_map.itervalues():
            name = alba_backend_info['name']
            backend_ip = alba_backend_info['backend_ip']

            location = 'local'
            remote_msg = ''
            if backend_ip not in local_ips:
                location = 'remote'
                remote_msg = ' (on remote IP {0})'.format(backend_ip)

            percentage = alba_backend_info[
                'requested_size'] / alba_backend_info['total_size'] * 100
            if percentage > 100:
                MonitoringController._logger.error(
                    'OVS_WARNING: Over-allocation for vDisk caching quota on {0} ALBA Backend {1}{2}. Unexpected behavior might occur'
                    .format(location, name, remote_msg))
            elif percentage > 70:
                MonitoringController._logger.warning(
                    'OVS_WARNING: vDisk caching quota on {0} ALBA Backend {1} is at {2:.1f}%{3}'
                    .format(location, name, percentage, remote_msg))
        MonitoringController._logger.info(
            'Finished vDisk cache quota verification')
Exemplo n.º 19
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'.format(storage_driver.storagerouter.ip, partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status('workers', sshclient) is False:
                                ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} - workers are not running'.format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[storage_driver.storagerouter] = str(partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning('Gather Scrub - Storage Router {0:<15} is not reachable'.format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info('Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info('Gather Scrub - Checking {0} volumes for scrub work'.format(len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info('Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'.format(storage_router.ip, 'local' if local is True else 'remote', len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_info[1],
                                                                                              vdisk_guids=vdisk_guids_to_scrub).apply_async(routing_key='sr.{0}'.format(storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                              vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                       timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning('Scrubbing failed on node {0}. Will reschedule on another node.'.format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info('Rescheduled scrub work from node {0} to node {1}.'.format(failed_node, storage_router.ip))
                        result_set[storage_router.ip] = ScheduledTaskController._execute_scrub_work.s(scrub_location=scrub_location,
                                                                                                      vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                                                                                      routing_key='sr.{0}'.format(storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(ScheduledTaskController._execute_scrub_work(scrub_location=local_scrub_location,
                                                                                           vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'.format(ex))
                else:
                    ScheduledTaskController._logger.warning('No nodes left to reschedule work from node {0}'.format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(result_set,
                                                                            timesleep=60)  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error('Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'.format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')
Exemplo n.º 20
0
 def list(self):
     """
     Overview of all StorageDrivers
     """
     return StorageDriverList.get_storagedrivers()
Exemplo n.º 21
0
    def add_vpool(parameters):
        """
        Add a vPool to the machine this task is running on
        """

        parameters = {} if parameters is None else parameters
        ip = parameters['storagerouter_ip']
        vpool_name = parameters['vpool_name']

        if StorageRouterController._validate_ip(ip) is False:
            raise ValueError('The entered ip address is invalid')

        if not re.match('^[0-9a-z]+(\-+[0-9a-z]+)*$', vpool_name):
            raise ValueError('Invalid vpool_name given')

        client = SSHClient.load(ip)  # Make sure to ALWAYS reload the client, as Fabric seems to be singleton-ish
        unique_id = System.get_my_machine_id(client)

        storagerouter = None
        for current_storagerouter in StorageRouterList.get_storagerouters():
            if current_storagerouter.ip == ip and current_storagerouter.machine_id == unique_id:
                storagerouter = current_storagerouter
                break
        if storagerouter is None:
            raise RuntimeError('Could not find Storage Router with given ip address')

        vpool = VPoolList.get_vpool_by_name(vpool_name)
        storagedriver = None
        if vpool is not None:
            if vpool.backend_type.code == 'local':
                # Might be an issue, investigating whether it's on the same not or not
                if len(vpool.storagedrivers) == 1 and vpool.storagedrivers[0].storagerouter.machine_id != unique_id:
                    raise RuntimeError('A local vPool with name {0} already exists'.format(vpool_name))
            for vpool_storagedriver in vpool.storagedrivers:
                if vpool_storagedriver.storagerouter_guid == storagerouter.guid:
                    storagedriver = vpool_storagedriver  # The vPool is already added to this Storage Router and this might be a cleanup/recovery

            # Check whether there are running machines on this vPool
            machine_guids = []
            for vdisk in vpool.vdisks:
                if vdisk.vmachine_guid not in machine_guids:
                    machine_guids.append(vdisk.vmachine_guid)
                    if vdisk.vmachine.hypervisor_status in ['RUNNING', 'PAUSED']:
                        raise RuntimeError(
                            'At least one vMachine using this vPool is still running or paused. Make sure there are no active vMachines'
                        )

        nodes = {ip}  # Set comprehension
        if vpool is not None:
            for vpool_storagedriver in vpool.storagedrivers:
                nodes.add(vpool_storagedriver.storagerouter.ip)
        nodes = list(nodes)

        services = ['volumedriver_{0}'.format(vpool_name),
                    'failovercache_{0}'.format(vpool_name)]

        # Stop services
        for node in nodes:
            node_client = SSHClient.load(node)
            for service in services:
                System.exec_remote_python(node_client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.disable_service('{0}')
""".format(service))
                System.exec_remote_python(node_client, """
from ovs.plugin.provider.service import Service
if Service.has_service('{0}'):
    Service.stop_service('{0}')
""".format(service))

        # Keep in mind that if the Storage Driver exists, the vPool does as well
        client = SSHClient.load(ip)
        mountpoint_bfs = ''
        directories_to_create = []

        if vpool is None:
            vpool = VPool()
            supported_backends = System.read_remote_config(client, 'volumedriver.supported.backends').split(',')
            if 'rest' in supported_backends:
                supported_backends.remove('rest')  # REST is not supported for now
            backend_type = BackendTypeList.get_backend_type_by_code(parameters['type'])
            vpool.backend_type = backend_type
            connection_host = connection_port = connection_username = connection_password = None
            if vpool.backend_type.code in ['local', 'distributed']:
                vpool.metadata = {'backend_type': 'LOCAL'}
                mountpoint_bfs = parameters['mountpoint_bfs']
                directories_to_create.append(mountpoint_bfs)
                vpool.metadata['local_connection_path'] = mountpoint_bfs
            if vpool.backend_type.code == 'rest':
                connection_host = parameters['connection_host']
                connection_port = parameters['connection_port']
                rest_connection_timeout_secs = parameters['connection_timeout']
                vpool.metadata = {'rest_connection_host': connection_host,
                                  'rest_connection_port': connection_port,
                                  'buchla_connection_log_level': "0",
                                  'rest_connection_verbose_logging': rest_connection_timeout_secs,
                                  'rest_connection_metadata_format': "JSON",
                                  'backend_type': 'REST'}
            elif vpool.backend_type.code in ('ceph_s3', 'amazon_s3', 'swift_s3'):
                connection_host = parameters['connection_host']
                connection_port = parameters['connection_port']
                connection_username = parameters['connection_username']
                connection_password = parameters['connection_password']
                if vpool.backend_type.code in ['swift_s3']:
                    strict_consistency = 'false'
                    s3_connection_flavour = 'SWIFT'
                else:
                    strict_consistency = 'true'
                    s3_connection_flavour = 'S3'

                vpool.metadata = {'s3_connection_host': connection_host,
                                  's3_connection_port': connection_port,
                                  's3_connection_username': connection_username,
                                  's3_connection_password': connection_password,
                                  's3_connection_flavour': s3_connection_flavour,
                                  's3_connection_strict_consistency': strict_consistency,
                                  's3_connection_verbose_logging': 1,
                                  'backend_type': 'S3'}

            vpool.name = vpool_name
            vpool.description = "{} {}".format(vpool.backend_type.code, vpool_name)
            vpool.login = connection_username
            vpool.password = connection_password
            if not connection_host:
                vpool.connection = None
            else:
                vpool.connection = '{}:{}'.format(connection_host, connection_port)
            vpool.save()

        # Connection information is Storage Driver related information
        new_storagedriver = False
        if storagedriver is None:
            storagedriver = StorageDriver()
            new_storagedriver = True

        mountpoint_temp = parameters['mountpoint_temp']
        mountpoint_md = parameters['mountpoint_md']
        mountpoint_readcache1 = parameters['mountpoint_readcache1']
        mountpoint_readcache2 = parameters.get('mountpoint_readcache2', '')
        mountpoint_writecache = parameters['mountpoint_writecache']
        mountpoint_foc = parameters['mountpoint_foc']

        directories_to_create.append(mountpoint_temp)
        directories_to_create.append(mountpoint_md)
        directories_to_create.append(mountpoint_readcache1)
        if mountpoint_readcache2:
            directories_to_create.append(mountpoint_readcache2)
        directories_to_create.append(mountpoint_writecache)
        directories_to_create.append(mountpoint_foc)

        client = SSHClient.load(ip)
        dir_create_script = """
import os
for directory in {0}:
    if not os.path.exists(directory):
        os.makedirs(directory)
""".format(directories_to_create)
        System.exec_remote_python(client, dir_create_script)

        read_cache1_fs = os.statvfs(mountpoint_readcache1)
        read_cache2_fs = None
        if mountpoint_readcache2:
            read_cache2_fs = os.statvfs(mountpoint_readcache2)
        write_cache_fs = os.statvfs(mountpoint_writecache)
        fdcache = '{}/fd_{}'.format(mountpoint_writecache, vpool_name)
        scocache = '{}/sco_{}'.format(mountpoint_writecache, vpool_name)
        readcache1 = '{}/read1_{}'.format(mountpoint_readcache1, vpool_name)
        files2create = [readcache1]
        if mountpoint_readcache2 and mountpoint_readcache1 != mountpoint_readcache2:
            readcache2 = '{}/read2_{}'.format(mountpoint_readcache2, vpool_name)
            files2create.append(readcache2)
        else:
            readcache2 = ''
        failovercache = '{}/foc_{}'.format(mountpoint_foc, vpool_name)
        metadatapath = '{}/metadata_{}'.format(mountpoint_md, vpool_name)
        tlogpath = '{}/tlogs_{}'.format(mountpoint_md, vpool_name)
        rsppath = '/var/rsp/{}'.format(vpool_name)
        dirs2create = [scocache, failovercache, metadatapath, tlogpath, rsppath,
                       System.read_remote_config(client, 'volumedriver.readcache.serialization.path')]

        cmd = "cat /etc/mtab | grep ^/dev/ | cut -d ' ' -f 2"
        mountpoints = [device.strip() for device in client.run(cmd).strip().split('\n')]
        mountpoints.remove('/')

        def is_partition(directory):
            for mountpoint in mountpoints:
                if directory == mountpoint:
                    return True
            return False
        # Cache sizes
        # 20% = scocache
        # 20% = failovercache (@TODO: check if this can possibly consume more than 20%)
        # 60% = readcache

        # safety values:
        readcache1_factor = 0.2
        readcache2_factor = 0.2
        writecache_factor = 0.1

        if (mountpoint_readcache1 == mountpoint_readcache2) or not mountpoint_readcache2:
            delta = set()
            delta.add(mountpoint_readcache1 if is_partition(mountpoint_readcache1) else '/dummy')
            delta.add(mountpoint_writecache if is_partition(mountpoint_writecache) else '/dummy')
            delta.add(mountpoint_foc if is_partition(mountpoint_foc) else '/dummy')
            if len(delta) == 1:
                readcache1_factor = 0.49
                writecache_factor = 0.2
            elif len(delta) == 2:
                if mountpoint_writecache == mountpoint_foc:
                    readcache1_factor = 0.98
                    writecache_factor = 0.49
                else:
                    readcache1_factor = 0.49
                    if mountpoint_readcache1 == mountpoint_writecache:
                        writecache_factor = 0.49
                    else:
                        writecache_factor = 0.98
            elif len(delta) == 3:
                readcache1_factor = 0.98
                writecache_factor = 0.98
        else:
            delta = set()
            delta.add(mountpoint_readcache1 if is_partition(mountpoint_readcache1) else '/dummy')
            delta.add(mountpoint_readcache2 if is_partition(mountpoint_readcache2) else '/dummy')
            delta.add(mountpoint_writecache if is_partition(mountpoint_writecache) else '/dummy')
            delta.add(mountpoint_foc if is_partition(mountpoint_foc) else '/dummy')
            if len(delta) == 1:
                # consider them all to be directories
                readcache1_factor = 0.24
                readcache2_factor = 0.24
                writecache_factor = 0.24
            elif len(delta) == 2:
                if mountpoint_writecache == mountpoint_foc:
                    writecache_factor = 0.24
                    if mountpoint_readcache1 == mountpoint_writecache:
                        readcache1_factor = 0.49
                        readcache2_factor = 0.98
                    else:
                        readcache1_factor = 0.98
                        readcache2_factor = 0.49
                else:
                    readcache1_factor = readcache2_factor = 0.49
                    writecache_factor = 0.49
            elif len(delta) == 3:
                if mountpoint_writecache == mountpoint_foc:
                    readcache1_factor = 0.98
                    readcache2_factor = 0.98
                    writecache_factor = 0.49
                elif mountpoint_readcache1 == mountpoint_writecache:
                    readcache1_factor = 0.49
                    readcache2_factor = 0.98
                    writecache_factor = 0.49
                elif mountpoint_readcache1 == mountpoint_foc:
                    readcache1_factor = 0.49
                    readcache2_factor = 0.98
                    writecache_factor = 0.98
                elif mountpoint_readcache2 == mountpoint_writecache:
                    readcache1_factor = 0.98
                    readcache2_factor = 0.49
                    writecache_factor = 0.49
                elif mountpoint_readcache2 == mountpoint_foc:
                    readcache1_factor = 0.98
                    readcache2_factor = 0.49
                    writecache_factor = 0.98
            elif len(delta) == 4:
                readcache1_factor = 0.98
                readcache2_factor = 0.98
                writecache_factor = 0.98

        # summarize caching on root partition (directory only)
        root_assigned = dict()
        if not is_partition(mountpoint_readcache1):
            root_assigned['readcache1_factor'] = readcache1_factor
        if not is_partition(mountpoint_readcache2):
            root_assigned['readcache2_factor'] = readcache2_factor
        if not is_partition(mountpoint_writecache):
            root_assigned['writecache_factor'] = writecache_factor
        if not is_partition(mountpoint_foc):
            root_assigned['foc_factor'] = min(readcache1_factor, readcache2_factor, writecache_factor)

        # always leave at least 20% of free space
        division_factor = 1.0
        total_size = sum(root_assigned.values()) + .02 * len(root_assigned)
        if 0.8 < total_size < 1.6:
            division_factor = 2.0
        elif 1.6 < total_size < 3.2:
            division_factor = 4.0
        elif total_size >= 3.2:
            division_factor = 8.0

        if 'readcache1_factor' in root_assigned.keys():
            readcache1_factor /= division_factor
        if 'readcache2_factor' in root_assigned.keys():
            readcache2_factor /= division_factor
        if 'writecache_factor' in root_assigned.keys():
            writecache_factor /= division_factor

        scocache_size = '{0}KiB'.format((int(write_cache_fs.f_bavail * writecache_factor / 4096) * 4096) * 4)
        if (mountpoint_readcache1 and not mountpoint_readcache2) or (mountpoint_readcache1 == mountpoint_readcache2):
            mountpoint_readcache2 = ''
            readcache1_size = '{0}KiB'.format((int(read_cache1_fs.f_bavail * readcache1_factor / 4096) * 4096) * 4)
            readcache2 = ''
            readcache2_size = '0KiB'
        else:
            readcache1_size = '{0}KiB'.format((int(read_cache1_fs.f_bavail * readcache1_factor / 4096) * 4096) * 4)
            readcache2_size = '{0}KiB'.format((int(read_cache2_fs.f_bavail * readcache2_factor / 4096) * 4096) * 4)
        if new_storagedriver:
            ports_in_use = System.ports_in_use(client)
            ports_reserved = []
            ports_in_use_model = {}
            for port_storagedriver in StorageDriverList.get_storagedrivers():
                if port_storagedriver.vpool_guid not in ports_in_use_model:
                    ports_in_use_model[port_storagedriver.vpool_guid] = port_storagedriver.ports
                    ports_reserved += port_storagedriver.ports
            if vpool.guid in ports_in_use_model:  # The vPool is extended to another StorageRouter. We need to use these ports.
                ports = ports_in_use_model[vpool.guid]
                if any(port in ports_in_use for port in ports):
                    raise RuntimeError('The required ports are in use')
            else:  # First StorageDriver for this vPool, so generating new ports
                ports = []
                for port_range in System.read_remote_config(client, 'volumedriver.filesystem.ports').split(','):
                    port_range = port_range.strip()
                    if '-' in port_range:
                        current_range = (int(port_range.split('-')[0]), int(port_range.split('-')[1]))
                    else:
                        current_range = (int(port_range), 65536)
                    current_port = current_range[0]
                    while len(ports) < 3:
                        if current_port not in ports_in_use and current_port not in ports_reserved:
                            ports.append(current_port)
                        current_port += 1
                        if current_port > current_range[1]:
                            break
                if len(ports) != 3:
                    raise RuntimeError('Could not find enough free ports')
        else:
            ports = storagedriver.ports

        ip_path = Configuration.get('ovs.core.ip.path')
        if ip_path is None:
            ip_path = "`which ip`"
        cmd = "{0} a | grep 'inet ' | sed 's/\s\s*/ /g' | cut -d ' ' -f 3 | cut -d '/' -f 1".format(ip_path)
        ipaddresses = client.run(cmd).strip().split('\n')
        ipaddresses = [ipaddr.strip() for ipaddr in ipaddresses]
        grid_ip = System.read_remote_config(client, 'ovs.grid.ip')
        if grid_ip in ipaddresses:
            ipaddresses.remove(grid_ip)
        if not ipaddresses:
            raise RuntimeError('No available ip addresses found suitable for Storage Router storage ip')
        if storagerouter.pmachine.hvtype == 'KVM':
            volumedriver_storageip = '127.0.0.1'
        else:
            volumedriver_storageip = parameters['storage_ip']
        vrouter_id = '{0}{1}'.format(vpool_name, unique_id)

        vrouter_config = {'vrouter_id': vrouter_id,
                          'vrouter_redirect_timeout_ms': '5000',
                          'vrouter_routing_retries': 10,
                          'vrouter_volume_read_threshold': 1024,
                          'vrouter_volume_write_threshold': 1024,
                          'vrouter_file_read_threshold': 1024,
                          'vrouter_file_write_threshold': 1024,
                          'vrouter_min_workers': 4,
                          'vrouter_max_workers': 16}
        voldrv_arakoon_cluster_id = str(System.read_remote_config(client, 'volumedriver.arakoon.clusterid'))
        voldrv_arakoon_cluster = ArakoonManagementEx().getCluster(voldrv_arakoon_cluster_id)
        voldrv_arakoon_client_config = voldrv_arakoon_cluster.getClientConfig()
        arakoon_node_configs = []
        for arakoon_node in voldrv_arakoon_client_config.keys():
            arakoon_node_configs.append(ArakoonNodeConfig(arakoon_node,
                                                          voldrv_arakoon_client_config[arakoon_node][0][0],
                                                          voldrv_arakoon_client_config[arakoon_node][1]))
        vrouter_clusterregistry = ClusterRegistry(str(vpool.guid), voldrv_arakoon_cluster_id, arakoon_node_configs)
        node_configs = []
        for existing_storagedriver in StorageDriverList.get_storagedrivers():
            if existing_storagedriver.vpool_guid == vpool.guid:
                node_configs.append(ClusterNodeConfig(str(existing_storagedriver.storagedriver_id),
                                                      str(existing_storagedriver.cluster_ip),
                                                      existing_storagedriver.ports[0],
                                                      existing_storagedriver.ports[1],
                                                      existing_storagedriver.ports[2]))
        if new_storagedriver:
            node_configs.append(ClusterNodeConfig(vrouter_id, grid_ip, ports[0], ports[1], ports[2]))
        vrouter_clusterregistry.set_node_configs(node_configs)
        readcaches = [{'path': readcache1, 'size': readcache1_size}]
        if readcache2:
            readcaches.append({'path': readcache2, 'size': readcache2_size})
        scocaches = [{'path': scocache, 'size': scocache_size}]
        filesystem_config = {'fs_backend_path': mountpoint_bfs}
        volumemanager_config = {'metadata_path': metadatapath, 'tlog_path': tlogpath}
        storagedriver_config_script = """
from ovs.plugin.provider.configuration import Configuration
from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration

fd_config = {{'fd_cache_path': '{11}',
              'fd_extent_cache_capacity': '1024',
              'fd_namespace' : 'fd-{0}-{12}'}}
storagedriver_configuration = StorageDriverConfiguration('{0}')
storagedriver_configuration.configure_backend({1})
storagedriver_configuration.configure_readcache({2}, Configuration.get('volumedriver.readcache.serialization.path') + '/{0}')
storagedriver_configuration.configure_scocache({3}, '1GB', '2GB')
storagedriver_configuration.configure_failovercache('{4}')
storagedriver_configuration.configure_filesystem({5})
storagedriver_configuration.configure_volumemanager({6})
storagedriver_configuration.configure_volumerouter('{12}', {7})
storagedriver_configuration.configure_arakoon_cluster('{8}', {9})
storagedriver_configuration.configure_hypervisor('{10}')
storagedriver_configuration.configure_filedriver(fd_config)
""".format(vpool_name, vpool.metadata, readcaches, scocaches, failovercache, filesystem_config,
           volumemanager_config, vrouter_config, voldrv_arakoon_cluster_id, voldrv_arakoon_client_config,
           storagerouter.pmachine.hvtype, fdcache, vpool.guid)
        System.exec_remote_python(client, storagedriver_config_script)
        remote_script = """
import os
from configobj import ConfigObj
from ovs.plugin.provider.configuration import Configuration
protocol = Configuration.get('ovs.core.broker.protocol')
login = Configuration.get('ovs.core.broker.login')
password = Configuration.get('ovs.core.broker.password')
vpool_name = {0}
uris = []
cfg = ConfigObj('/opt/OpenvStorage/config/rabbitmqclient.cfg')
main_section = cfg.get('main')
nodes = main_section['nodes'] if type(main_section['nodes']) == list else [main_section['nodes']]
for node in nodes:
    uris.append({{'amqp_uri': '{{0}}://{{1}}:{{2}}@{{3}}'.format(protocol, login, password, cfg.get(node)['location'])}})
from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
queue_config = {{'events_amqp_routing_key': Configuration.get('ovs.core.broker.volumerouter.queue'),
                 'events_amqp_uris': uris}}
for config_file in os.listdir('/opt/OpenvStorage/config/voldrv_vpools'):
    this_vpool_name = config_file.replace('.json', '')
    if config_file.endswith('.json') and (vpool_name is None or vpool_name == this_vpool_name):
        storagedriver_configuration = StorageDriverConfiguration(this_vpool_name)
        storagedriver_configuration.configure_event_publisher(queue_config)
""".format(vpool_name if vpool_name is None else "'{0}'".format(vpool_name))
        System.exec_remote_python(client, remote_script)

        # Updating the model
        storagedriver.storagedriver_id = vrouter_id
        storagedriver.name = vrouter_id.replace('_', ' ')
        storagedriver.description = storagedriver.name
        storagedriver.storage_ip = volumedriver_storageip
        storagedriver.cluster_ip = grid_ip
        storagedriver.ports = ports
        storagedriver.mountpoint = '/mnt/{0}'.format(vpool_name)
        storagedriver.mountpoint_temp = mountpoint_temp
        storagedriver.mountpoint_readcache1 = mountpoint_readcache1
        storagedriver.mountpoint_readcache2 = mountpoint_readcache2
        storagedriver.mountpoint_writecache = mountpoint_writecache
        storagedriver.mountpoint_foc = mountpoint_foc
        storagedriver.mountpoint_bfs = mountpoint_bfs
        storagedriver.mountpoint_md = mountpoint_md
        storagedriver.storagerouter = storagerouter
        storagedriver.vpool = vpool
        storagedriver.save()

        dirs2create.append(storagedriver.mountpoint)
        dirs2create.append(mountpoint_writecache + '/' + '/fd_' + vpool_name)
        dirs2create.append('{0}/fd_{1}'.format(mountpoint_writecache, vpool_name))

        file_create_script = """
import os
for directory in {0}:
    if not os.path.exists(directory):
        os.makedirs(directory)
for filename in {1}:
    if not os.path.exists(filename):
        open(filename, 'a').close()
""".format(dirs2create, files2create)
        System.exec_remote_python(client, file_create_script)

        voldrv_config_file = '{0}/voldrv_vpools/{1}.json'.format(System.read_remote_config(client, 'ovs.core.cfgdir'),
                                                                 vpool_name)
        log_file = '/var/log/ovs/volumedriver/{0}.log'.format(vpool_name)
        vd_cmd = '/usr/bin/volumedriver_fs -f --config-file={0} --mountpoint {1} --logrotation --logfile {2} -o big_writes -o sync_read -o allow_other'.format(
            voldrv_config_file, storagedriver.mountpoint, log_file)
        if storagerouter.pmachine.hvtype == 'KVM':
            vd_stopcmd = 'umount {0}'.format(storagedriver.mountpoint)
        else:
            vd_stopcmd = 'exportfs -u *:{0}; umount {0}'.format(storagedriver.mountpoint)
        vd_name = 'volumedriver_{}'.format(vpool_name)

        log_file = '/var/log/ovs/volumedriver/foc_{0}.log'.format(vpool_name)
        fc_cmd = '/usr/bin/failovercachehelper --config-file={0} --logfile={1}'.format(voldrv_config_file, log_file)
        fc_name = 'failovercache_{0}'.format(vpool_name)

        params = {'<VPOOL_MOUNTPOINT>': storagedriver.mountpoint,
                  '<HYPERVISOR_TYPE>': storagerouter.pmachine.hvtype,
                  '<VPOOL_NAME>': vpool_name,
                  '<UUID>': str(uuid.uuid4())}
        if Osdist.is_ubuntu(client):
            if client.file_exists('/opt/OpenvStorage/config/templates/upstart/ovs-volumedriver.conf'):
                client.run('cp -f /opt/OpenvStorage/config/templates/upstart/ovs-volumedriver.conf /opt/OpenvStorage/config/templates/upstart/ovs-volumedriver_{0}.conf'.format(vpool_name))
                client.run('cp -f /opt/OpenvStorage/config/templates/upstart/ovs-failovercache.conf /opt/OpenvStorage/config/templates/upstart/ovs-failovercache_{0}.conf'.format(vpool_name))
        else:
             if client.file_exists('/opt/OpenvStorage/config/templates/systemd/ovs-volumedriver.service'):
                client.run('cp -f /opt/OpenvStorage/config/templates/systemd/ovs-volumedriver.service /opt/OpenvStorage/config/templates/systemd/ovs-volumedriver_{0}.service'.format(vpool_name))
                client.run('cp -f /opt/OpenvStorage/config/templates/systemd/ovs-failovercache.service /opt/OpenvStorage/config/templates/systemd/ovs-failovercache_{0}.service'.format(vpool_name))

        service_script = """
from ovs.plugin.provider.service import Service
Service.add_service(package=('openvstorage', 'volumedriver'), name='{0}', command='{1}', stop_command='{2}', params={5})
Service.add_service(package=('openvstorage', 'failovercache'), name='{3}', command='{4}', stop_command=None, params={5})
""".format(
            vd_name, vd_cmd, vd_stopcmd,
            fc_name, fc_cmd, params
        )
        System.exec_remote_python(client, service_script)

        if storagerouter.pmachine.hvtype == 'VMWARE':
            client.run("grep -q '/tmp localhost(ro,no_subtree_check)' /etc/exports || echo '/tmp localhost(ro,no_subtree_check)' >> /etc/exports")
            if Osdist.is_ubuntu(client):
                client.run('service nfs-kernel-server start')
            else:
                client.run('service nfs start')

        if storagerouter.pmachine.hvtype == 'KVM':
            client.run('virsh pool-define-as {0} dir - - - - {1}'.format(vpool_name, storagedriver.mountpoint))
            client.run('virsh pool-build {0}'.format(vpool_name))
            client.run('virsh pool-start {0}'.format(vpool_name))
            client.run('virsh pool-autostart {0}'.format(vpool_name))

        # Start services
        for node in nodes:
            node_client = SSHClient.load(node)
            for service in services:
                System.exec_remote_python(node_client, """
from ovs.plugin.provider.service import Service
Service.enable_service('{0}')
""".format(service))
                System.exec_remote_python(node_client, """
from ovs.plugin.provider.service import Service
Service.start_service('{0}')
""".format(service))

        # Fill vPool size
        vfs_info = os.statvfs('/mnt/{0}'.format(vpool_name))
        vpool.size = vfs_info.f_blocks * vfs_info.f_bsize
        vpool.save()

        # Configure Cinder
        ovsdb = PersistentFactory.get_client()
        vpool_config_key = str('ovs_openstack_cinder_%s' % storagedriver.vpool_guid)
        if ovsdb.exists(vpool_config_key):
            # Second node gets values saved by first node
            cinder_password, cinder_user, tenant_name, controller_ip, config_cinder = ovsdb.get(vpool_config_key)
        else:
            config_cinder = parameters.get('config_cinder', False)
            cinder_password = ''
            cinder_user = ''
            tenant_name = ''
            controller_ip = ''
        if config_cinder:
            cinder_password = parameters.get('cinder_pass', cinder_password)
            cinder_user = parameters.get('cinder_user', cinder_user)
            tenant_name = parameters.get('cinder_tenant', tenant_name)
            controller_ip = parameters.get('cinder_controller', controller_ip) # Keystone host
            if cinder_password:
                osc = OpenStackCinder(cinder_password = cinder_password,
                                      cinder_user = cinder_user,
                                      tenant_name = tenant_name,
                                      controller_ip = controller_ip)

                osc.configure_vpool(vpool_name, storagedriver.mountpoint)
                # Save values for first node to use
                ovsdb.set(vpool_config_key,
                          [cinder_password, cinder_user, tenant_name, controller_ip, config_cinder])
Exemplo n.º 22
0
    def migrate():
        """
        Executes async migrations. It doesn't matter too much when they are executed, as long as they get eventually
        executed. This code will typically contain:
        * "dangerous" migration code (it needs certain running services)
        * Migration code depending on a cluster-wide state
        * ...
        * Successfully finishing a piece of migration code, should create an entry in /ovs/framework/migration in case it should not be executed again
        *     Eg: /ovs/framework/migration|stats_monkey_integration: True
        """
        MigrationController._logger.info('Preparing out of band migrations...')

        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.storagedriverlist import StorageDriverList
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.vpoollist import VPoolList
        from ovs.extensions.db.arakooninstaller import ArakoonInstaller
        from ovs.extensions.generic.configuration import Configuration
        from ovs.extensions.generic.sshclient import SSHClient
        from ovs_extensions.generic.toolbox import ExtensionsToolbox
        from ovs.extensions.migration.migration.ovsmigrator import ExtensionMigrator
        from ovs.extensions.packages.packagefactory import PackageFactory
        from ovs_extensions.services.interfaces.systemd import Systemd
        from ovs.extensions.services.servicefactory import ServiceFactory
        from ovs.extensions.storageserver.storagedriver import StorageDriverConfiguration
        from ovs.lib.helpers.storagedriver.installer import StorageDriverInstaller

        MigrationController._logger.info('Start out of band migrations...')
        service_manager = ServiceFactory.get_manager()

        sr_client_map = {}
        for storagerouter in StorageRouterList.get_storagerouters():
            sr_client_map[storagerouter.guid] = SSHClient(endpoint=storagerouter.ip,  # Is triggered during post-update code too during which the ovs-watcher-framework service is still down and thus not refreshing the heartbeat --> use IP i/o StorageRouter
                                                          username='******')

        #########################################################
        # Addition of 'ExecReload' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if 'ExecReload=' in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue

                    try:
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        ##################################################################
        # Adjustment of open file descriptors for Arakoon services to 8192
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-arakoon-'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'LimitNOFILE=8192'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'limit nofile 8192 8192'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='arakoon',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        #############################
        # Migrate to multiple proxies
        for storagedriver in StorageDriverList.get_storagedrivers():
            vpool = storagedriver.vpool
            root_client = sr_client_map[storagedriver.storagerouter_guid]
            for alba_proxy in storagedriver.alba_proxies:
                # Rename alba_proxy service in model
                service = alba_proxy.service
                old_service_name = 'albaproxy_{0}'.format(vpool.name)
                new_service_name = 'albaproxy_{0}_0'.format(vpool.name)
                if old_service_name != service.name:
                    continue
                service.name = new_service_name
                service.save()

                if not service_manager.has_service(name=old_service_name, client=root_client):
                    continue
                old_configuration_key = '/ovs/framework/hosts/{0}/services/{1}'.format(storagedriver.storagerouter.machine_id, old_service_name)
                if not Configuration.exists(key=old_configuration_key):
                    continue

                # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='alba',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, old_service_name),
                                                    new_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, new_service_name))

                # Register new service and remove old service
                service_manager.add_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                            client=root_client,
                                            params=Configuration.get(old_configuration_key),
                                            target_name='ovs-{0}'.format(new_service_name))

                # Update scrub proxy config
                proxy_config_key = '/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)
                proxy_config = None if Configuration.exists(key=proxy_config_key) is False else Configuration.get(proxy_config_key)
                if proxy_config is not None:
                    fragment_cache = proxy_config.get(StorageDriverConfiguration.CACHE_FRAGMENT, ['none', {}])
                    if fragment_cache[0] == 'alba' and fragment_cache[1].get('cache_on_write') is True:  # Accelerated ALBA configured
                        fragment_cache_scrub_info = copy.deepcopy(fragment_cache)
                        fragment_cache_scrub_info[1]['cache_on_read'] = False
                        proxy_scrub_config_key = '/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid)
                        proxy_scrub_config = None if Configuration.exists(key=proxy_scrub_config_key) is False else Configuration.get(proxy_scrub_config_key)
                        if proxy_scrub_config is not None and proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] == ['none']:
                            proxy_scrub_config[StorageDriverConfiguration.CACHE_FRAGMENT] = fragment_cache_scrub_info
                            Configuration.set(key=proxy_scrub_config_key, value=proxy_scrub_config)

            # Update 'backend_connection_manager' section
            changes = False
            storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
            if 'backend_connection_manager' not in storagedriver_config.configuration:
                continue

            current_config = storagedriver_config.configuration['backend_connection_manager']
            if current_config.get('backend_type') != 'MULTI':
                changes = True
                backend_connection_manager = {'backend_type': 'MULTI'}
                for index, proxy in enumerate(sorted(storagedriver.alba_proxies, key=lambda pr: pr.service.ports[0])):
                    backend_connection_manager[str(index)] = copy.deepcopy(current_config)
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_use_rora'] = True
                    # noinspection PyUnresolvedReferences
                    backend_connection_manager[str(index)]['alba_connection_rora_manifest_cache_capacity'] = 5000
                    # noinspection PyUnresolvedReferences
                    for key, value in backend_connection_manager[str(index)].items():
                        if key.startswith('backend_interface'):
                            backend_connection_manager[key] = value
                            # noinspection PyUnresolvedReferences
                            del backend_connection_manager[str(index)][key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        backend_connection_manager[key] = value
            else:
                backend_connection_manager = current_config
                for value in backend_connection_manager.values():
                    if isinstance(value, dict):
                        for key, val in value.items():
                            if key.startswith('backend_interface'):
                                backend_connection_manager[key] = val
                                changes = True
                                del value[key]
                for key, value in {'backend_interface_retries_on_error': 5,
                                   'backend_interface_retry_interval_secs': 1,
                                   'backend_interface_retry_backoff_multiplier': 2.0}.iteritems():
                    if key not in backend_connection_manager:
                        changes = True
                        backend_connection_manager[key] = value

            if changes is True:
                storagedriver_config.clear_backend_connection_manager()
                storagedriver_config.configure_backend_connection_manager(**backend_connection_manager)
                storagedriver_config.save(root_client)

                # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                ExtensionsToolbox.edit_version_file(client=root_client,
                                                    package_name='volumedriver',
                                                    old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
                if service_manager.__class__ == Systemd:
                    root_client.run(['systemctl', 'daemon-reload'])

        ########################################
        # Update metadata_store_bits information
        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            bits = None
            for storagedriver in vpool.storagedrivers:
                key = '/ovs/framework/hosts/{0}/services/volumedriver_{1}'.format(storagedriver.storagerouter.machine_id, vpool.name)
                if Configuration.exists(key=key) and 'METADATASTORE_BITS' not in Configuration.get(key=key):
                    if bits is None:
                        entries = service_manager.extract_from_service_file(name='ovs-volumedriver_{0}'.format(vpool.name),
                                                                            client=sr_client_map[storagedriver.storagerouter_guid],
                                                                            entries=['METADATASTORE_BITS='])
                        if len(entries) == 1:
                            bits = entries[0].split('=')[-1]
                            bits = int(bits) if bits.isdigit() else 5
                    if bits is not None:
                        try:
                            content = Configuration.get(key=key)
                            content['METADATASTORE_BITS'] = bits
                            Configuration.set(key=key, value=content)
                        except:
                            MigrationController._logger.exception('Error updating volumedriver info for vPool {0} on StorageRouter {1}'.format(vpool.name, storagedriver.storagerouter.name))

            if bits is not None:
                vpool.metadata_store_bits = bits
                vpool.save()

        #####################################
        # Update the vPool metadata structure
        def _update_metadata_structure(metadata):
            metadata = copy.deepcopy(metadata)
            cache_structure = {'read': False,
                               'write': False,
                               'is_backend': False,
                               'quota': None,
                               'backend_info': {'name': None,  # Will be filled in when is_backend is true
                                                'backend_guid': None,
                                                'alba_backend_guid': None,
                                                'policies': None,
                                                'preset': None,
                                                'arakoon_config': None,
                                                'connection_info': {'client_id': None,
                                                                    'client_secret': None,
                                                                    'host': None,
                                                                    'port': None,
                                                                    'local': None}}
                               }
            structure_map = {StorageDriverConfiguration.CACHE_BLOCK: {'read': 'block_cache_on_read',
                                                                      'write': 'block_cache_on_write',
                                                                      'quota': 'quota_bc',
                                                                      'backend_prefix': 'backend_bc_{0}'},
                             StorageDriverConfiguration.CACHE_FRAGMENT: {'read': 'fragment_cache_on_read',
                                                                         'write': 'fragment_cache_on_write',
                                                                         'quota': 'quota_fc',
                                                                         'backend_prefix': 'backend_aa_{0}'}}
            if 'arakoon_config' in metadata['backend']:  # Arakoon config should be placed under the backend info
                metadata['backend']['backend_info']['arakoon_config'] = metadata['backend'].pop('arakoon_config')
            if 'connection_info' in metadata['backend']:  # Connection info sohuld be placed under the backend info
                metadata['backend']['backend_info']['connection_info'] = metadata['backend'].pop('connection_info')
            if 'caching_info' not in metadata:  # Caching info is the new key
                would_be_caching_info = {}
                metadata['caching_info'] = would_be_caching_info
                # Extract all caching data for every storagerouter
                current_caching_info = metadata['backend'].pop('caching_info')  # Pop to mutate metadata
                for storagerouter_guid in current_caching_info.iterkeys():
                    current_cache_data = current_caching_info[storagerouter_guid]
                    storagerouter_caching_info = {}
                    would_be_caching_info[storagerouter_guid] = storagerouter_caching_info
                    for cache_type, cache_type_mapping in structure_map.iteritems():
                        new_cache_structure = copy.deepcopy(cache_structure)
                        storagerouter_caching_info[cache_type] = new_cache_structure
                        for new_structure_key, old_structure_key in cache_type_mapping.iteritems():
                            if new_structure_key == 'backend_prefix':
                                # Get possible backend related info
                                metadata_key = old_structure_key.format(storagerouter_guid)
                                if metadata_key not in metadata:
                                    continue
                                backend_data = metadata.pop(metadata_key)  # Pop to mutate metadata
                                new_cache_structure['is_backend'] = True
                                # Copy over the old data
                                new_cache_structure['backend_info']['arakoon_config'] = backend_data['arakoon_config']
                                new_cache_structure['backend_info'].update(backend_data['backend_info'])
                                new_cache_structure['backend_info']['connection_info'].update(backend_data['connection_info'])
                            else:
                                new_cache_structure[new_structure_key] = current_cache_data.get(old_structure_key)
            return metadata

        vpools = VPoolList.get_vpools()
        for vpool in vpools:
            try:
                new_metadata = _update_metadata_structure(vpool.metadata)
                vpool.metadata = new_metadata
                vpool.save()
            except KeyError:
                MigrationController._logger.exception('Exceptions occurred when updating the metadata for vPool {0}'.format(vpool.name))

        ##############################################
        # Always use indent=4 during Configuration set
        def _resave_all_config_entries(config_path='/ovs'):
            """
            Recursive functions which checks every config management key if its a directory or not.
            If not a directory, we retrieve the config and just save it again using the new indentation logic
            """
            for item in Configuration.list(config_path):
                new_path = config_path + '/' + item
                print new_path
                if Configuration.dir_exists(new_path) is True:
                    _resave_all_config_entries(config_path=new_path)
                else:
                    try:
                        _config = Configuration.get(new_path)
                        Configuration.set(new_path, _config)
                    except:
                        _config = Configuration.get(new_path, raw=True)
                        Configuration.set(new_path, _config, raw=True)
        if ExtensionMigrator.THIS_VERSION <= 13:  # There is no way of checking whether this new indentation logic has been applied, so we only perform this for version 13 and lower
            MigrationController._logger.info('Re-saving every configuration setting with new indentation rules')
            _resave_all_config_entries()

        ############################
        # Update some default values
        def _update_manifest_cache_size(_proxy_config_key):
            updated = False
            manifest_cache_size = 500 * 1024 * 1024
            if Configuration.exists(key=_proxy_config_key):
                _proxy_config = Configuration.get(key=_proxy_config_key)
                for cache_type in [StorageDriverConfiguration.CACHE_BLOCK, StorageDriverConfiguration.CACHE_FRAGMENT]:
                    if cache_type in _proxy_config and _proxy_config[cache_type][0] == 'alba':
                        if _proxy_config[cache_type][1]['manifest_cache_size'] != manifest_cache_size:
                            updated = True
                            _proxy_config[cache_type][1]['manifest_cache_size'] = manifest_cache_size
                if _proxy_config['manifest_cache_size'] != manifest_cache_size:
                    updated = True
                    _proxy_config['manifest_cache_size'] = manifest_cache_size

                if updated is True:
                    Configuration.set(key=_proxy_config_key, value=_proxy_config)
            return updated

        for storagedriver in StorageDriverList.get_storagedrivers():
            try:
                vpool = storagedriver.vpool
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                _update_manifest_cache_size('/ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(vpool.guid))  # Generic scrub proxy is deployed every time scrubbing kicks in, so no need to restart these services
                for alba_proxy in storagedriver.alba_proxies:
                    if _update_manifest_cache_size('/ovs/vpools/{0}/proxies/{1}/config/main'.format(vpool.guid, alba_proxy.guid)) is True:
                        # Add '-reboot' to alba_proxy services (because of newly created services and removal of old service)
                        ExtensionsToolbox.edit_version_file(client=root_client,
                                                            package_name='alba',
                                                            old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, alba_proxy.service.name))

                # Update 'backend_connection_manager' section
                changes = False
                storagedriver_config = StorageDriverConfiguration(vpool.guid, storagedriver.storagedriver_id)
                if 'backend_connection_manager' not in storagedriver_config.configuration:
                    continue

                current_config = storagedriver_config.configuration['backend_connection_manager']
                for key, value in current_config.iteritems():
                    if key.isdigit() is True:
                        if value.get('alba_connection_asd_connection_pool_capacity') != 10:
                            changes = True
                            value['alba_connection_asd_connection_pool_capacity'] = 10
                        if value.get('alba_connection_timeout') != 30:
                            changes = True
                            value['alba_connection_timeout'] = 30
                        if value.get('alba_connection_rora_manifest_cache_capacity') != 25000:
                            changes = True
                            value['alba_connection_rora_manifest_cache_capacity'] = 25000

                if changes is True:
                    storagedriver_config.clear_backend_connection_manager()
                    storagedriver_config.configure_backend_connection_manager(**current_config)
                    storagedriver_config.save(root_client)

                    # Add '-reboot' to volumedriver services (because of updated 'backend_connection_manager' section)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='volumedriver',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, 'volumedriver_{0}'.format(vpool.name)))
            except Exception:
                MigrationController._logger.exception('Updating default configuration values failed for StorageDriver {0}'.format(storagedriver.storagedriver_id))

        ####################################################
        # Adding proxy fail fast as env variable for proxies
        changed_clients = set()
        for storagerouter in StorageRouterList.get_storagerouters():
            root_client = sr_client_map[storagerouter.guid]
            for service_name in service_manager.list_services(client=root_client):
                if not service_name.startswith('ovs-albaproxy_'):
                    continue

                if ServiceFactory.get_service_type() == 'systemd':
                    path = '/lib/systemd/system/{0}.service'.format(service_name)
                    check = 'Environment=ALBA_FAIL_FAST=true'
                else:
                    path = '/etc/init/{0}.conf'.format(service_name)
                    check = 'env ALBA_FAIL_FAST=true'

                if not root_client.file_exists(path):
                    continue
                if check in root_client.file_read(path):
                    continue

                try:
                    service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY, client=root_client, target_name=service_name)
                    changed_clients.add(root_client)
                    ExtensionsToolbox.edit_version_file(client=root_client,
                                                        package_name='alba',
                                                        old_run_file='{0}/{1}.version'.format(ServiceFactory.RUN_FILE_DIR, service_name))
                except:
                    MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
        for root_client in changed_clients:
            root_client.run(['systemctl', 'daemon-reload'])

        ######################################
        # Integration of stats monkey (2.10.2)
        if Configuration.get(key='/ovs/framework/migration|stats_monkey_integration', default=False) is False:
            try:
                # Get content of old key into new key
                old_stats_monkey_key = '/statsmonkey/statsmonkey'
                if Configuration.exists(key=old_stats_monkey_key) is True:
                    Configuration.set(key='/ovs/framework/monitoring/stats_monkey', value=Configuration.get(key=old_stats_monkey_key))
                    Configuration.delete(key=old_stats_monkey_key)

                # Make sure to disable the stats monkey by default or take over the current schedule if it was configured manually before
                celery_key = '/ovs/framework/scheduling/celery'
                current_value = None
                scheduling_config = Configuration.get(key=celery_key, default={})
                if 'statsmonkey.run_all_stats' in scheduling_config:  # Old celery task name of the stats monkey
                    current_value = scheduling_config.pop('statsmonkey.run_all_stats')
                scheduling_config['ovs.stats_monkey.run_all'] = current_value
                scheduling_config['alba.stats_monkey.run_all'] = current_value
                Configuration.set(key=celery_key, value=scheduling_config)

                support_key = '/ovs/framework/support'
                support_config = Configuration.get(key=support_key)
                support_config['support_agent'] = support_config.pop('enabled', True)
                support_config['remote_access'] = support_config.pop('enablesupport', False)
                Configuration.set(key=support_key, value=support_config)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|stats_monkey_integration', value=True)
            except Exception:
                MigrationController._logger.exception('Integration of stats monkey failed')

        ######################################################
        # Write away cluster ID to a file for back-up purposes
        try:
            cluster_id = Configuration.get(key='/ovs/framework/cluster_id', default=None)
            with open(Configuration.CONFIG_STORE_LOCATION, 'r') as config_file:
                config = json.load(config_file)
            if cluster_id is not None and config.get('cluster_id', None) is None:
                config['cluster_id'] = cluster_id
                with open(Configuration.CONFIG_STORE_LOCATION, 'w') as config_file:
                    json.dump(config, config_file, indent=4)
        except Exception:
            MigrationController._logger.exception('Writing cluster id to a file failed.')

        #########################################################
        # Additional string formatting in Arakoon services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|arakoon_service_update', default=False) is False:
                arakoon_service_names = [ArakoonInstaller.get_service_name_for_cluster(cluster_name=cluster_name) for cluster_name in Configuration.list(key='ovs/arakoon')]
                for storagerouter in StorageRouterList.get_masters():
                    for service_name in arakoon_service_names:
                        config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagerouter.machine_id, service_name)
                        if Configuration.exists(key=config_key):
                            config = Configuration.get(key=config_key)
                            config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                            config['ARAKOON_PKG_NAME'] = PackageFactory.PKG_ARAKOON
                            config['ARAKOON_VERSION_CMD'] = PackageFactory.VERSION_CMD_ARAKOON
                            Configuration.set(key=config_key, value=config)
                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|arakoon_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in ALBA proxy services (2.11)
        changed_clients = set()
        try:
            if Configuration.get(key='/ovs/framework/migration|alba_proxy_service_update', default=False) is False:
                alba_pkg_name, alba_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_ALBA)
                for service in ServiceTypeList.get_by_name('AlbaProxy').services:
                    root_client = sr_client_map[service.storagerouter_guid]
                    config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(service.storagerouter.machine_id, service.name)
                    if Configuration.exists(key=config_key):
                        config = Configuration.get(key=config_key)
                        config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                        config['ALBA_PKG_NAME'] = alba_pkg_name
                        config['ALBA_VERSION_CMD'] = alba_version_cmd
                        Configuration.set(key=config_key, value=config)
                        service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_PROXY,
                                                           client=root_client,
                                                           target_name='ovs-{0}'.format(service.name))
                        changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|alba_proxy_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        ############################################################
        # Additional string formatting in DTL/VOLDRV services (2.11)
        try:
            if Configuration.get(key='/ovs/framework/migration|voldrv_service_update', default=False) is False:
                sd_pkg_name, sd_version_cmd = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for vpool in VPoolList.get_vpools():
                    for storagedriver in vpool.storagedrivers:
                        root_client = sr_client_map[storagedriver.storagerouter_guid]
                        for entry in ['dtl', 'volumedriver']:
                            service_name = '{0}_{1}'.format(entry, vpool.name)
                            service_template = StorageDriverInstaller.SERVICE_TEMPLATE_DTL if entry == 'dtl' else StorageDriverInstaller.SERVICE_TEMPLATE_SD
                            config_key = ServiceFactory.SERVICE_CONFIG_KEY.format(storagedriver.storagerouter.machine_id, service_name)
                            if Configuration.exists(key=config_key):
                                config = Configuration.get(key=config_key)
                                config['RUN_FILE_DIR'] = ServiceFactory.RUN_FILE_DIR
                                config['VOLDRV_PKG_NAME'] = sd_pkg_name
                                config['VOLDRV_VERSION_CMD'] = sd_version_cmd
                                Configuration.set(key=config_key, value=config)
                                service_manager.regenerate_service(name=service_template,
                                                                   client=root_client,
                                                                   target_name='ovs-{0}'.format(service_name))
                                changed_clients.add(root_client)

                # Make sure once this finished, it never runs again by setting this key to True
                Configuration.set(key='/ovs/framework/migration|voldrv_service_update', value=True)
        except Exception:
            MigrationController._logger.exception('Updating the string formatting for the Arakoon services failed')

        #######################################################
        # Storing actual package name in version files (2.11.0) (https://github.com/openvstorage/framework/issues/1876)
        if Configuration.get(key='/ovs/framework/migration|actual_package_name_in_version_file', default=False) is False:
            try:
                voldrv_pkg_name, _ = PackageFactory.get_package_and_version_cmd_for(component=PackageFactory.COMP_SD)
                for storagerouter in StorageRouterList.get_storagerouters():
                    root_client = sr_client_map.get(storagerouter.guid)
                    if root_client is None:
                        continue

                    for file_name in root_client.file_list(directory=ServiceFactory.RUN_FILE_DIR):
                        if not file_name.endswith('.version'):
                            continue
                        file_path = '{0}/{1}'.format(ServiceFactory.RUN_FILE_DIR, file_name)
                        contents = root_client.file_read(filename=file_path)
                        regenerate = False
                        if voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER:
                            if 'volumedriver-server' in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER)
                                root_client.file_write(filename=file_path, contents=contents)
                        elif voldrv_pkg_name == PackageFactory.PKG_VOLDRV_SERVER_EE:
                            if 'volumedriver-server' in contents or PackageFactory.PKG_VOLDRV_SERVER in contents:
                                regenerate = True
                                contents = contents.replace('volumedriver-server', PackageFactory.PKG_VOLDRV_SERVER_EE)
                                contents = contents.replace(PackageFactory.PKG_VOLDRV_SERVER, PackageFactory.PKG_VOLDRV_SERVER_EE)
                                root_client.file_write(filename=file_path, contents=contents)

                        if regenerate is True:
                            service_manager.regenerate_service(name=StorageDriverInstaller.SERVICE_TEMPLATE_DTL if file_name.startswith('dtl') else StorageDriverInstaller.SERVICE_TEMPLATE_SD,
                                                               client=root_client,
                                                               target_name='ovs-{0}'.format(file_name.split('.')[0]))  # Leave out .version
                            changed_clients.add(root_client)
                Configuration.set(key='/ovs/framework/migration|actual_package_name_in_version_file', value=True)
            except Exception:
                MigrationController._logger.exception('Updating actual package name for version files failed')

        for root_client in changed_clients:
            try:
                root_client.run(['systemctl', 'daemon-reload'])
            except Exception:
                MigrationController._logger.exception('Executing command "systemctl daemon-reload" failed')

        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for AlbaProxy SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagedriver in StorageDriverList.get_storagedrivers():
                root_client = sr_client_map[storagedriver.storagerouter_guid]
                for alba_proxy in storagedriver.alba_proxies:
                    service = alba_proxy.service
                    service_name = 'ovs-{0}'.format(service.name)
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-albaproxy', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])
        #########################################################
        # Addition of 'Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50' for Arakoon SystemD services
        if ServiceFactory.get_service_type() == 'systemd':
            changed_clients = set()
            for storagerouter in StorageRouterList.get_storagerouters():
                root_client = sr_client_map[storagerouter.guid]
                for service_name in service_manager.list_services(client=root_client):
                    if not service_name.startswith('ovs-arakoon-'):
                        continue
                    if not service_manager.has_service(name=service_name, client=root_client):
                        continue
                    if "Environment=OCAMLRUNPARAM='b,a=1,s=4096k,O=50" in root_client.file_read(filename='/lib/systemd/system/{0}.service'.format(service_name)):
                        continue
                    try:
                        service_manager.regenerate_service(name='ovs-arakoon', client=root_client, target_name=service_name)
                        changed_clients.add(root_client)
                    except:
                        MigrationController._logger.exception('Error rebuilding service {0}'.format(service_name))
            for root_client in changed_clients:
                root_client.run(['systemctl', 'daemon-reload'])

        MigrationController._logger.info('Finished out of band migrations')
Exemplo n.º 23
0
    def gather_scrub_work():
        """
        Retrieve and execute scrub work
        :return: None
        """
        ScheduledTaskController._logger.info('Gather Scrub - Started')

        scrub_locations = {}
        for storage_driver in StorageDriverList.get_storagedrivers():
            for partition in storage_driver.partitions:
                if DiskPartition.ROLES.SCRUB == partition.role:
                    ScheduledTaskController._logger.info(
                        'Gather Scrub - Storage Router {0:<15} has SCRUB partition at {1}'
                        .format(storage_driver.storagerouter.ip,
                                partition.path))
                    if storage_driver.storagerouter not in scrub_locations:
                        try:
                            sshclient = SSHClient(storage_driver.storagerouter)
                            # Use ServiceManager(sshclient) to make sure ovs-workers are actually running
                            if ServiceManager.get_service_status(
                                    'workers', sshclient) is False:
                                ScheduledTaskController._logger.warning(
                                    'Gather Scrub - Storage Router {0:<15} - workers are not running'
                                    .format(storage_driver.storagerouter.ip))
                            else:
                                scrub_locations[
                                    storage_driver.storagerouter] = str(
                                        partition.path)
                        except UnableToConnectException:
                            ScheduledTaskController._logger.warning(
                                'Gather Scrub - Storage Router {0:<15} is not reachable'
                                .format(storage_driver.storagerouter.ip))

        if len(scrub_locations) == 0:
            raise RuntimeError('No scrub locations found')

        vdisk_guids = set()
        for vmachine in VMachineList.get_customer_vmachines():
            for vdisk in vmachine.vdisks:
                if vdisk.info['object_type'] == 'BASE':
                    vdisk_guids.add(vdisk.guid)
        for vdisk in VDiskList.get_without_vmachine():
            if vdisk.info['object_type'] == 'BASE':
                vdisk_guids.add(vdisk.guid)

        if len(vdisk_guids) == 0:
            ScheduledTaskController._logger.info(
                'Gather Scrub - No scrub work needed'.format(len(vdisk_guids)))
            return

        ScheduledTaskController._logger.info(
            'Gather Scrub - Checking {0} volumes for scrub work'.format(
                len(vdisk_guids)))
        local_machineid = System.get_my_machine_id()
        local_storage_router = None
        local_scrub_location = None
        local_vdisks_to_scrub = []
        result_set = {}
        storage_router_list = []
        scrub_map = {}

        for index, scrub_info in enumerate(scrub_locations.items()):
            start_index = index * len(vdisk_guids) / len(scrub_locations)
            end_index = (index + 1) * len(vdisk_guids) / len(scrub_locations)
            storage_router = scrub_info[0]
            vdisk_guids_to_scrub = list(vdisk_guids)[start_index:end_index]
            local = storage_router.machine_id == local_machineid
            ScheduledTaskController._logger.info(
                'Gather Scrub - Storage Router {0:<15} ({1}) - Scrubbing {2} virtual disks'
                .format(storage_router.ip,
                        'local' if local is True else 'remote',
                        len(vdisk_guids_to_scrub)))

            if local is True:
                local_storage_router = storage_router
                local_scrub_location = scrub_info[1]
                local_vdisks_to_scrub = vdisk_guids_to_scrub
            else:
                result_set[storage_router.
                           ip] = ScheduledTaskController._execute_scrub_work.s(
                               scrub_location=scrub_info[1],
                               vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                   routing_key='sr.{0}'.format(
                                       storage_router.machine_id))
                storage_router_list.append(storage_router)
                scrub_map[storage_router.ip] = vdisk_guids_to_scrub

        # Remote tasks have been launched, now start the local task and then wait for remote tasks to finish
        processed_guids = []
        if local_scrub_location is not None and len(local_vdisks_to_scrub) > 0:
            try:
                processed_guids = ScheduledTaskController._execute_scrub_work(
                    scrub_location=local_scrub_location,
                    vdisk_guids=local_vdisks_to_scrub)
            except Exception as ex:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(local_storage_router.ip, ex))

        all_results, failed_nodes = CeleryToolbox.manage_running_tasks(
            result_set,
            timesleep=60)  # Check every 60 seconds if tasks are still running

        for ip, result in all_results.iteritems():
            if isinstance(result, list):
                processed_guids.extend(result)
            else:
                ScheduledTaskController._logger.error(
                    'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                    .format(ip, result))

        result_set = {}
        for failed_node in failed_nodes:
            ScheduledTaskController._logger.warning(
                'Scrubbing failed on node {0}. Will reschedule on another node.'
                .format(failed_node))
            vdisk_guids_to_scrub = scrub_map[failed_node]
            rescheduled_work = False
            for storage_router, scrub_location in scrub_locations.items():
                if storage_router.ip not in failed_nodes:
                    if storage_router.machine_id != local_machineid:
                        ScheduledTaskController._logger.info(
                            'Rescheduled scrub work from node {0} to node {1}.'
                            .format(failed_node, storage_router.ip))
                        result_set[
                            storage_router.
                            ip] = ScheduledTaskController._execute_scrub_work.s(
                                scrub_location=scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub).apply_async(
                                    routing_key='sr.{0}'.format(
                                        storage_router.machine_id))
                        storage_router_list.append(storage_router)
                        rescheduled_work = True
                        break
            if rescheduled_work is False:
                if local_scrub_location is not None:
                    try:
                        processed_guids.extend(
                            ScheduledTaskController._execute_scrub_work(
                                scrub_location=local_scrub_location,
                                vdisk_guids=vdisk_guids_to_scrub))
                    except Exception as ex:
                        ScheduledTaskController._logger.error(
                            'Gather Scrub - Storage Router Local - Scrubbing failed with error:\n - {0}'
                            .format(ex))
                else:
                    ScheduledTaskController._logger.warning(
                        'No nodes left to reschedule work from node {0}'.
                        format(failed_node))

        if len(result_set) > 0:
            all_results2, failed_nodes = CeleryToolbox.manage_running_tasks(
                result_set, timesleep=60
            )  # Check every 60 seconds if tasks are still running

            for ip, result in all_results2.iteritems():
                if isinstance(result, list):
                    processed_guids.extend(result)
                else:
                    ScheduledTaskController._logger.error(
                        'Gather Scrub - Storage Router {0:<15} - Scrubbing failed with error:\n - {1}'
                        .format(ip, result))

        if len(set(processed_guids)) != len(vdisk_guids) or set(
                processed_guids).difference(vdisk_guids):
            raise RuntimeError('Scrubbing failed for 1 or more storagerouters')
        ScheduledTaskController._logger.info('Gather Scrub - Finished')