def get_services():
        """
        Fetch all services

        :return:
        """
        return ServiceList.get_services()
Пример #2
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        ScheduledTaskController._logger.info('Starting arakoon collapse')
        arakoon_clusters = []
        for service in ServiceList.get_services():
            if service.is_internal is True and \
               service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                arakoon_clusters.append(service.name.replace('arakoon-', ''))

        for cluster in arakoon_clusters:
            ScheduledTaskController._logger.info('  Collapsing cluster {0}'.format(cluster))
            contents = EtcdConfiguration.get(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True)
            parser = RawConfigParser()
            parser.readfp(StringIO(contents))
            nodes = {}
            for node in parser.get('global', 'cluster').split(','):
                node = node.strip()
                nodes[node] = ([str(parser.get(node, 'ip'))], int(parser.get(node, 'client_port')))
            config = ArakoonClientConfig(str(cluster), nodes)
            for node in nodes.keys():
                ScheduledTaskController._logger.info('    Collapsing node: {0}'.format(node))
                client = ArakoonAdmin(config)
                try:
                    client.collapse(str(node), 2)
                except:
                    ScheduledTaskController._logger.exception('Error during collapsing cluster {0} node {1}'.format(cluster, node))

        ScheduledTaskController._logger.info('Arakoon collapse finished')
    def checkRequiredPorts(self):
        self.utility.logger("Checking PORT CONNECTIONS of several services ...", self.module, 3,
                            'checkRequiredPorts', False)

        # check ports for OVS services
        self.utility.logger("Checking OVS services ...", self.module, 3, 'checkOvsServicesPorts', False)
        for sr in ServiceList.get_services():
            if sr.storagerouter_guid == self.machine_details.guid:
                for port in sr.ports:
                    self._isPortListening(sr.name, port)

        # check NGINX and memcached
        self.utility.logger("Checking NGINX and Memcached ...", self.module, 3, 'checkNginxAndMemcached', False)

        for process, ports in self.req_side_ports.iteritems():
            for port in ports:
                self._isPortListening(process, port)

        # Check Celery and RabbitMQ
        self.utility.logger("Checking RabbitMQ/Celery ...", self.module, 3, 'checkRabbitmqCelery', False)

        if self.utility.node_type == "MASTER":
            PCOMMAND = "celery inspect ping -b amqp://ovs:0penv5tor4ge@{0}//".format(self.machine_details.ip)
            pcel = self.utility.executeBashCommand(PCOMMAND.format(process))
            if len(pcel) != 1 and 'pong' in pcel[1].strip():
                self.utility.logger("Connection successfully established!", self.module, 1, 'port_celery')
            else:
                self.utility.logger("Connection FAILED to service Celery, please check 'RabbitMQ' and 'ovs-workers'?",
                                     self.module, 0, 'port_celery')
        else:
            self.utility.logger("RabbitMQ is not running/active on this server!", self.module, 5, 'port_celery')
Пример #4
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        logger.info('Starting arakoon collapse')
        arakoon_clusters = {}
        for service in ServiceList.get_services():
            if service.type.name in ('Arakoon', 'NamespaceManager', 'AlbaManager'):
                arakoon_clusters[service.name.replace('arakoon-', '')] = service.storagerouter

        for cluster, storagerouter in arakoon_clusters.iteritems():
            logger.info('  Collapsing cluster {0}'.format(cluster))
            contents = EtcdConfiguration.get(ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True)
            parser = RawConfigParser()
            parser.readfp(StringIO(contents))
            nodes = {}
            for node in parser.get('global', 'cluster').split(','):
                node = node.strip()
                nodes[node] = ([parser.get(node, 'ip')], parser.get(node, 'client_port'))
            config = ArakoonClientConfig(str(cluster), nodes)
            for node in nodes.keys():
                logger.info('    Collapsing node: {0}'.format(node))
                client = ArakoonAdminClient(node, config)
                try:
                    client.collapse_tlogs(2)
                except:
                    logger.exception('Error during collapsing cluster {0} node {1}'.format(cluster, node))

        logger.info('Arakoon collapse finished')
Пример #5
0
    def get_services():
        """
        Fetch all services

        :return:
        """
        return ServiceList.get_services()
Пример #6
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        from ovs_extensions.generic.toolbox import ExtensionsToolbox

        GenericController._logger.info('Arakoon collapse started')
        cluster_info = []
        storagerouters = StorageRouterList.get_storagerouters()
        if os.environ.get('RUNNING_UNITTESTS') != 'True':
            cluster_info = [('cacc', storagerouters[0])]

        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = ExtensionsToolbox.remove_prefix(service.name, 'arakoon-')
                if cluster in cluster_names and cluster not in [ARAKOON_NAME, ARAKOON_NAME_UNITTEST]:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter))
        workload = {}
        cluster_config_map = {}
        for cluster, storagerouter in cluster_info:
            GenericController._logger.debug('  Collecting info for cluster {0}'.format(cluster))
            ip = storagerouter.ip if cluster in [ARAKOON_NAME, ARAKOON_NAME_UNITTEST] else None
            try:
                config = ArakoonClusterConfig(cluster_id=cluster, source_ip=ip)
                cluster_config_map[cluster] = config
            except:
                GenericController._logger.exception('  Retrieving cluster information on {0} for {1} failed'.format(storagerouter.ip, cluster))
                continue
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name,
                                         'clusters': []}
                workload[node.ip]['clusters'].append((cluster, ip))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, ip in node_workload['clusters']:
                    try:
                        GenericController._logger.debug('  Collapsing cluster {0} on {1}'.format(cluster, storagerouter.ip))
                        client.run(['arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', cluster_config_map[cluster].external_config_path])
                        GenericController._logger.debug('  Collapsing cluster {0} on {1} completed'.format(cluster, storagerouter.ip))
                    except:
                        GenericController._logger.exception('  Collapsing cluster {0} on {1} failed'.format(cluster, storagerouter.ip))
            except UnableToConnectException:
                GenericController._logger.error('  Could not collapse any cluster on {0} (not reachable)'.format(storagerouter.name))
        GenericController._logger.info('Arakoon collapse finished')
Пример #7
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        ScheduledTaskController._logger.info('Starting arakoon collapse')
        storagerouters = StorageRouterList.get_storagerouters()
        cluster_info = [('cacc', storagerouters[0], True)]
        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = service.name.replace('arakoon-', '')
                if cluster in cluster_names:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter, False))
        workload = {}
        for cluster, storagerouter, filesystem in cluster_info:
            ScheduledTaskController._logger.debug('  Collecting info for cluster {0}'.format(cluster))
            config = ArakoonClusterConfig(cluster, filesystem=filesystem)
            config.load_config(storagerouter.ip)
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name,
                                         'clusters': []}
                workload[node.ip]['clusters'].append((cluster, filesystem))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, filesystem in node_workload['clusters']:
                    try:
                        ScheduledTaskController._logger.debug('  Collapsing cluster {0} on {1}'.format(cluster, storagerouter.ip))
                        if filesystem is True:
                            config_path = ArakoonClusterConfig.CONFIG_FILE.format(cluster)
                        else:
                            config_path = Configuration.get_configuration_path(ArakoonClusterConfig.CONFIG_KEY.format(cluster))
                        client.run(['arakoon', '--collapse-local', node_workload['node_id'], '2', '-config', config_path])
                        ScheduledTaskController._logger.info('  Collapsing cluster {0} on {1} completed'.format(cluster, storagerouter.ip))
                    except:
                        ScheduledTaskController._logger.exception('  Collapsing cluster {0} on {1} failed'.format(cluster, storagerouter.ip))
            except UnableToConnectException:
                ScheduledTaskController._logger.error('  Could not collapse any cluster on {0} (not reachable)'.format(storagerouter.name))

        ScheduledTaskController._logger.info('Arakoon collapse finished')
Пример #8
0
    def sync_vdisk_to_reality(vdisk):
        """
        Syncs a vdisk to reality (except hypervisor)
        :param vdisk: vDisk to synchronize
        :type vdisk: VDisk

        :return: None
        """
        vdisk.reload_client('storagedriver')
        vdisk.invalidate_dynamics(['info'])
        config = vdisk.info['metadata_backend_config']
        config_dict = {}
        for item in config:
            if item['ip'] not in config_dict:
                config_dict[item['ip']] = []
            config_dict[item['ip']].append(item['port'])
        mds_dict = {}
        for junction in vdisk.mds_services:
            service = junction.mds_service.service
            storagerouter = service.storagerouter
            if config[0]['ip'] == storagerouter.ip and config[0][
                    'port'] == service.ports[0]:
                junction.is_master = True
                junction.save()
                if storagerouter.ip not in mds_dict:
                    mds_dict[storagerouter.ip] = []
                mds_dict[storagerouter.ip].append(service.ports[0])
            elif storagerouter.ip in config_dict and service.ports[
                    0] in config_dict[storagerouter.ip]:
                junction.is_master = False
                junction.save()
                if storagerouter.ip not in mds_dict:
                    mds_dict[storagerouter.ip] = []
                mds_dict[storagerouter.ip].append(service.ports[0])
            else:
                junction.delete()
        for ip, ports in config_dict.iteritems():
            for port in ports:
                if ip not in mds_dict or port not in mds_dict[ip]:
                    service = ServiceList.get_by_ip_ports(ip, [port])
                    if service is not None:
                        mds_service_vdisk = MDSServiceVDisk()
                        mds_service_vdisk.vdisk = vdisk
                        mds_service_vdisk.mds_service = service.mds_service
                        mds_service_vdisk.is_master = config[0][
                            'ip'] == service.storagerouter.ip and config[0][
                                'port'] == service.ports[0]
                        mds_service_vdisk.save()
Пример #9
0
    def sync_vdisk_to_reality(vdisk):
        """
        Syncs a vdisk to reality (except hypervisor)
        :param vdisk: vDisk to synchronize
        :type vdisk: VDisk

        :return: None
        """
        vdisk.reload_client("storagedriver")
        vdisk.invalidate_dynamics(["info"])
        config = vdisk.info["metadata_backend_config"]
        config_dict = {}
        for item in config:
            if item["ip"] not in config_dict:
                config_dict[item["ip"]] = []
            config_dict[item["ip"]].append(item["port"])
        mds_dict = {}
        for junction in vdisk.mds_services:
            service = junction.mds_service.service
            storagerouter = service.storagerouter
            if config[0]["ip"] == storagerouter.ip and config[0]["port"] == service.ports[0]:
                junction.is_master = True
                junction.save()
                if storagerouter.ip not in mds_dict:
                    mds_dict[storagerouter.ip] = []
                mds_dict[storagerouter.ip].append(service.ports[0])
            elif storagerouter.ip in config_dict and service.ports[0] in config_dict[storagerouter.ip]:
                junction.is_master = False
                junction.save()
                if storagerouter.ip not in mds_dict:
                    mds_dict[storagerouter.ip] = []
                mds_dict[storagerouter.ip].append(service.ports[0])
            else:
                junction.delete()
        for ip, ports in config_dict.iteritems():
            for port in ports:
                if ip not in mds_dict or port not in mds_dict[ip]:
                    service = ServiceList.get_by_ip_ports(ip, [port])
                    if service is not None:
                        mds_service_vdisk = MDSServiceVDisk()
                        mds_service_vdisk.vdisk = vdisk
                        mds_service_vdisk.mds_service = service.mds_service
                        mds_service_vdisk.is_master = (
                            config[0]["ip"] == service.storagerouter.ip and config[0]["port"] == service.ports[0]
                        )
                        mds_service_vdisk.save()
    def ovs_4509_validate_arakoon_collapse_test():
        """
        Validate arakoon collapse
        """
        node_ips = [sr.ip for sr in GeneralStorageRouter.get_storage_routers()]
        node_ips.sort()
        for node_ip in node_ips:
            root_client = SSHClient(node_ip, username='******')
            arakoon_clusters = []
            for service in ServiceList.get_services():
                if service.is_internal is True and service.storagerouter.ip == node_ip and \
                    service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                          ServiceType.SERVICE_TYPES.NS_MGR,
                                          ServiceType.SERVICE_TYPES.ALBA_MGR):
                    arakoon_clusters.append(service.name.replace('arakoon-', ''))

            for arakoon_cluster in arakoon_clusters:
                arakoon_config_path = Configuration.get_configuration_path('/ovs/arakoon/{0}/config'.format(arakoon_cluster))
                tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format(arakoon_cluster)

                # read_tlog_dir
                with remote(node_ip, [Configuration]) as rem:
                    config_contents = rem.Configuration.get('/ovs/arakoon/{0}/config'.format(arakoon_cluster), raw=True)
                for line in config_contents.splitlines():
                    if 'tlog_dir' in line:
                        tlog_location = line.split()[-1]

                nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location)
                old_headdb_timestamp = 0
                if root_client.file_exists('/'.join([tlog_location, 'head.db'])):
                    old_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db'])
                if nr_of_tlogs <= 2:
                    benchmark_command = ['arakoon', '--benchmark', '-n_clients', '1', '-max_n', '5_000', '-config', arakoon_config_path]
                    root_client.run(benchmark_command)

                GenericController.collapse_arakoon()

                nr_of_tlogs = TestArakoon.get_nr_of_tlogs_in_folder(root_client, tlog_location)
                new_headdb_timestamp = root_client.run(['stat', '--format=%Y', tlog_location + '/head.db'])
                assert nr_of_tlogs <= 2,\
                    'Arakoon collapse left {0} tlogs on the environment, expecting less than 2'.format(nr_of_tlogs)
                assert old_headdb_timestamp != new_headdb_timestamp,\
                    'Timestamp of the head_db file was not changed in the process of collapsing tlogs'
Пример #11
0
    def sync_vdisk_to_reality(vdisk):
        """
        Syncs a vdisk to reality (except hypervisor)
        """

        vdisk.reload_client()
        vdisk.invalidate_dynamics(['info'])
        config = vdisk.info['metadata_backend_config']
        config_dict = {}
        for item in config:
            if item['ip'] not in config_dict:
                config_dict[item['ip']] = []
            config_dict[item['ip']].append(item['port'])
        mds_dict = {}
        for junction in vdisk.mds_services:
            service = junction.mds_service.service
            storagerouter = service.storagerouter
            if config[0]['ip'] == storagerouter.ip and config[0]['port'] == service.ports[0]:
                junction.is_master = True
                junction.save()
                if storagerouter.ip not in mds_dict:
                    mds_dict[storagerouter.ip] = []
                mds_dict[storagerouter.ip].append(service.ports[0])
            elif storagerouter.ip in config_dict and service.ports[0] in config_dict[storagerouter.ip]:
                junction.is_master = False
                junction.save()
                if storagerouter.ip not in mds_dict:
                    mds_dict[storagerouter.ip] = []
                mds_dict[storagerouter.ip].append(service.ports[0])
            else:
                junction.delete()
        for ip, ports in config_dict.iteritems():
            for port in ports:
                if ip not in mds_dict or port not in mds_dict[ip]:
                    service = ServiceList.get_by_ip_ports(ip, [port])
                    if service is not None:
                        mds_service_vdisk = MDSServiceVDisk()
                        mds_service_vdisk.vdisk = vdisk
                        mds_service_vdisk.mds_service = service.mds_service
                        mds_service_vdisk.is_master = config[0]['ip'] == service.storagerouter.ip and config[0]['port'] == service.ports[0]
                        mds_service_vdisk.save()
Пример #12
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        ScheduledTaskController._logger.info('Starting arakoon collapse')
        arakoon_clusters = []
        for service in ServiceList.get_services():
            if service.is_internal is True and \
               service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                     ServiceType.SERVICE_TYPES.NS_MGR,
                                     ServiceType.SERVICE_TYPES.ALBA_MGR):
                arakoon_clusters.append(service.name.replace('arakoon-', ''))

        for cluster in arakoon_clusters:
            ScheduledTaskController._logger.info(
                '  Collapsing cluster {0}'.format(cluster))
            contents = EtcdConfiguration.get(
                ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True)
            parser = RawConfigParser()
            parser.readfp(StringIO(contents))
            nodes = {}
            for node in parser.get('global', 'cluster').split(','):
                node = node.strip()
                nodes[node] = ([str(parser.get(node, 'ip'))],
                               int(parser.get(node, 'client_port')))
            config = ArakoonClientConfig(str(cluster), nodes)
            for node in nodes.keys():
                ScheduledTaskController._logger.info(
                    '    Collapsing node: {0}'.format(node))
                client = ArakoonAdmin(config)
                try:
                    client.collapse(str(node), 2)
                except:
                    ScheduledTaskController._logger.exception(
                        'Error during collapsing cluster {0} node {1}'.format(
                            cluster, node))

        ScheduledTaskController._logger.info('Arakoon collapse finished')
Пример #13
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        logger.info('Starting arakoon collapse')
        arakoon_clusters = {}
        for service in ServiceList.get_services():
            if service.type.name in ('Arakoon', 'NamespaceManager',
                                     'AlbaManager'):
                arakoon_clusters[service.name.replace(
                    'arakoon-', '')] = service.storagerouter

        for cluster, storagerouter in arakoon_clusters.iteritems():
            logger.info('  Collapsing cluster {0}'.format(cluster))
            contents = EtcdConfiguration.get(
                ArakoonClusterConfig.ETCD_CONFIG_KEY.format(cluster), raw=True)
            parser = RawConfigParser()
            parser.readfp(StringIO(contents))
            nodes = {}
            for node in parser.get('global', 'cluster').split(','):
                node = node.strip()
                nodes[node] = ([parser.get(node, 'ip')],
                               parser.get(node, 'client_port'))
            config = ArakoonClientConfig(str(cluster), nodes)
            for node in nodes.keys():
                logger.info('    Collapsing node: {0}'.format(node))
                client = ArakoonAdminClient(node, config)
                try:
                    client.collapse_tlogs(2)
                except:
                    logger.exception(
                        'Error during collapsing cluster {0} node {1}'.format(
                            cluster, node))

        logger.info('Arakoon collapse finished')
Пример #14
0
    def collapse_arakoon():
        """
        Collapse Arakoon's Tlogs
        :return: None
        """
        ScheduledTaskController._logger.info('Starting arakoon collapse')
        storagerouters = StorageRouterList.get_storagerouters()
        cluster_info = [('cacc', storagerouters[0], True)]
        cluster_names = []
        for service in ServiceList.get_services():
            if service.is_internal is True and service.type.name in (
                    ServiceType.SERVICE_TYPES.ARAKOON,
                    ServiceType.SERVICE_TYPES.NS_MGR,
                    ServiceType.SERVICE_TYPES.ALBA_MGR):
                cluster = service.name.replace('arakoon-', '')
                if cluster in cluster_names:
                    continue
                cluster_names.append(cluster)
                cluster_info.append((cluster, service.storagerouter, False))
        workload = {}
        for cluster, storagerouter, filesystem in cluster_info:
            ScheduledTaskController._logger.debug(
                '  Collecting info for cluster {0}'.format(cluster))
            config = ArakoonClusterConfig(cluster, filesystem=filesystem)
            config.load_config(storagerouter.ip)
            for node in config.nodes:
                if node.ip not in workload:
                    workload[node.ip] = {'node_id': node.name, 'clusters': []}
                workload[node.ip]['clusters'].append((cluster, filesystem))
        for storagerouter in storagerouters:
            try:
                if storagerouter.ip not in workload:
                    continue
                node_workload = workload[storagerouter.ip]
                client = SSHClient(storagerouter)
                for cluster, filesystem in node_workload['clusters']:
                    try:
                        ScheduledTaskController._logger.debug(
                            '  Collapsing cluster {0} on {1}'.format(
                                cluster, storagerouter.ip))
                        if filesystem is True:
                            config_path = ArakoonClusterConfig.CONFIG_FILE.format(
                                cluster)
                        else:
                            config_path = Configuration.get_configuration_path(
                                ArakoonClusterConfig.CONFIG_KEY.format(
                                    cluster))
                        client.run([
                            'arakoon', '--collapse-local',
                            node_workload['node_id'], '2', '-config',
                            config_path
                        ])
                        ScheduledTaskController._logger.info(
                            '  Collapsing cluster {0} on {1} completed'.format(
                                cluster, storagerouter.ip))
                    except:
                        ScheduledTaskController._logger.exception(
                            '  Collapsing cluster {0} on {1} failed'.format(
                                cluster, storagerouter.ip))
            except UnableToConnectException:
                ScheduledTaskController._logger.error(
                    '  Could not collapse any cluster on {0} (not reachable)'.
                    format(storagerouter.name))

        ScheduledTaskController._logger.info('Arakoon collapse finished')
    def get_backend_stats():
        """
        Send backend stats for each backend to InfluxDB
        """
        points = []
        abms = []
        abs = []

        for service in ServiceList.get_services():
            if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                abms.append(service.name)

        for ab in AlbaNodeList.get_albanodes():
            abs.append(ab.node_id)

        abms = list(set(abms))

        config = "etcd://127.0.0.1:2379/ovs/arakoon/{}/config".format(abms[0])
        try:
            decommissioning_osds = AlbaCLI.run('list-decommissioning-osds', config=config, to_json=True)
        except Exception as ex:
            StatsmonkeyScheduledTaskController._logger.error('{0}'.format(ex.message))
            return None

        filtered_osds = []

        for ab in abs:
            filtered_osds += [osd for osd in decommissioning_osds if osd['node_id'] == ab]

        abl = AlbaBackendList.get_albabackends()

        for ab in abl:
            try:
                stat = {
                    'measurement': 'backend_stats',
                    'tags': {
                        'backend_name': ab.name
                    },
                    'fields': {
                        'gets': ab.statistics['multi_get']['n'],
                        'puts': ab.statistics['apply']['n']
                    }
                }
                stat_asd = {
                    'decommissioning': len(filtered_osds),
                    'decommissioned': 0,
                    'claimed': 0,
                    'warning': 0,
                    'failure': 0,
                    'error': 0
                }

                for disks in ab.local_stack.values():
                    for disk in disks.values():
                        for asd in disk['asds'].values():
                            if asd['alba_backend_guid'] == ab.guid:
                                status = asd['status']
                                status_detail = asd['status_detail']
                                if status_detail == 'decommissioned':
                                    status = status_detail
                                if status not in stat_asd:
                                    stat_asd[status] = 0
                                stat_asd[status] += 1

                for status in stat_asd:
                    stat['fields'][status] = stat_asd[status]
                points.append(stat)
            except Exception as ex:
                StatsmonkeyScheduledTaskController._logger.error(ex.message)

        if len(points) == 0:
            StatsmonkeyScheduledTaskController._logger.info("No statistics found")
            return None

        StatsmonkeyScheduledTaskController._send_stats(points)
        return points
    def get_disk_safety():
        """
        Send disk safety for each vpool and the amount of namespaces with the lowest disk safety
        """
        points = []
        abms = []

        for service in ServiceList.get_services():
            if service.type.name == ServiceType.SERVICE_TYPES.ALBA_MGR:
                abms.append(service.name)

        abms = list(set(abms))
        abl = AlbaBackendList.get_albabackends()
        for ab in abl:
            service_name = Service(ab.abm_services[0].service_guid).name
            if service_name not in abms:
                continue

            config = "etcd://127.0.0.1:2379/ovs/arakoon/{}/config".format(service_name)

            try:
                disk_safety = AlbaCLI.run('get-disk-safety', config=config, to_json=True)
            except Exception as ex:
                StatsmonkeyScheduledTaskController._logger.error('{0}: {1}'.format(service_name, ex.message))
                continue

            presets = ab.presets
            used_preset = None
            for preset in presets:
                try:
                    policies = preset['policy_metadata']
                    for policy in policies:
                        if policies[policy]['is_active'] and policies[policy]['in_use']:
                            used_preset = policy

                    if used_preset is not None:
                        used_preset = json.loads(used_preset.replace('(', '[').replace(')', ']'))
                        max_disk_safety = used_preset[1]

                        safety = {
                            'measurement': 'disk_safety',
                            'tags': {
                                'backend_name': ab.name,
                                'max_disk_safety': max_disk_safety,
                                'min_disk_safety': max_disk_safety
                            },
                            'fields': {
                                'amount_max_disk_safety': 0,
                                'amount_between_disk_safety': 0,
                                'amount_min_disk_safety': 0
                            }
                        }
                        stats = {}
                        for disk in disk_safety:
                            if disk['safety'] is not None:
                                if disk['safety'] not in stats:
                                    stats[disk['safety']] = 0
                                stats[disk['safety']] += 1
                        min_disk_safety = min(stats.keys())
                        safety['tags']['min_disk_safety'] = min_disk_safety

                        for stat in stats:
                            if stat == max_disk_safety:
                                safety['fields']['amount_max_disk_safety'] = stats[stat]
                            elif stat == min_disk_safety:
                                safety['fields']['amount_min_disk_safety'] = stats[stat]
                            else:
                                safety['fields']['amount_between_disk_safety'] += stats[stat]

                        points.append(safety)
                except Exception as ex:
                    StatsmonkeyScheduledTaskController._logger.error(ex.message)

        if len(points) == 0:
            StatsmonkeyScheduledTaskController._logger.info("No statistics found")
            return

        StatsmonkeyScheduledTaskController._send_stats(points)
        return points
Пример #17
0
    def _sync_vdisk_to_reality(cls, vdisk):
        """
        Syncs the MDS junction services for a vDisk to the services configured in the StorageDriver
        :param vdisk: vDisk to synchronize
        :type vdisk: ovs.dal.hybrids.vdisk.VDisk
        :return: None
        :rtype: NoneType
        """
        cls._logger.info('vDisk {0} - {1}: Syncing to reality'.format(vdisk.guid, vdisk.name))

        sd_master_ip = None  # IP of the master service according to StorageDriver
        sd_master_port = None  # Port of the master service according to StorageDriver
        sd_mds_config = collections.OrderedDict()  # MDS services according to StorageDriver
        model_mds_config = collections.OrderedDict()  # MDS services according to model

        vdisk.reload_client('storagedriver')
        vdisk.invalidate_dynamics(['info', 'storagerouter_guid'])

        # Verify the StorageDriver services
        cls._logger.debug('vDisk {0} - {1}: Current MDS Config: {2}'.format(vdisk.guid, vdisk.name, vdisk.info['metadata_backend_config']))
        for index, mds_entry in enumerate(vdisk.info['metadata_backend_config']):
            ip = mds_entry['ip']
            port = mds_entry['port']
            if index == 0:  # First entry is the master MDS service
                sd_master_ip = ip
                sd_master_port = port
            if ip not in sd_mds_config:
                sd_mds_config[ip] = []
            sd_mds_config[ip].append(port)

        # Verify the model junction services (Relations between the MDS Services and the vDisks)
        for junction in list(vdisk.mds_services):
            model_ip = junction.mds_service.service.storagerouter.ip
            model_port = junction.mds_service.service.ports[0]
            cls._logger.debug('vDisk {0} - {1}: Validating junction service {2}:{3}'.format(vdisk.guid, vdisk.name, model_ip, model_port))

            # Remove duplicate junction services
            if model_ip in model_mds_config and model_port in model_mds_config[model_ip]:
                cls._logger.warning('vDisk {0} - {1}: Deleting junction service {2}:{3} : Duplicate'.format(vdisk.guid, vdisk.name, model_ip, model_port))
                junction.delete()
                continue

            # Remove junction services not known by StorageDriver
            elif model_ip not in sd_mds_config or model_port not in sd_mds_config[model_ip]:
                cls._logger.warning('vDisk {0} - {1}: Deleting junction service {2}:{3} : Unknown by StorageDriver'.format(vdisk.guid, vdisk.name, model_ip, model_port))
                junction.delete()
                continue

            junction.is_master = model_ip == sd_master_ip and model_port == sd_master_port
            junction.save()
            if model_ip not in model_mds_config:
                model_mds_config[model_ip] = []
            model_mds_config[model_ip].append(model_port)

        cls._logger.debug('vDisk {0} - {1}: MDS services according to model: {2}'.format(vdisk.guid, vdisk.name, ', '.join(['{0}:{1}'.format(ip, port) for ip, ports in model_mds_config.iteritems() for port in ports])))
        cls._logger.debug('vDisk {0} - {1}: MDS services according to StorageDriver: {2}'.format(vdisk.guid, vdisk.name, ', '.join(['{0}:{1}'.format(ip, port) for ip, ports in sd_mds_config.iteritems() for port in ports])))
        for ip, ports in sd_mds_config.iteritems():
            for port in ports:
                if ip not in model_mds_config or port not in model_mds_config[ip]:
                    cls._logger.debug('vDisk {0} - {1}: Modeling junction service {2}:{3}'.format(vdisk.guid, vdisk.name, ip, port))
                    service = ServiceList.get_by_ip_ports(ip, [port])
                    if service is None and vdisk.storagerouter_guid is not None:
                        cls._logger.critical('vDisk {0} - {1}: Failed to find an MDS Service for {2}:{3}. Creating a new MDS Service'.format(vdisk.guid, vdisk.name, ip, port))
                        storagerouter = StorageRouter(vdisk.storagerouter_guid)
                        try:
                            service = cls.prepare_mds_service(storagerouter=storagerouter, vpool=vdisk.vpool).service
                        except Exception:
                            cls._logger.exception('vDisk {0} - {1}: Creating MDS Service failed'.format(vdisk.guid, vdisk.name))

                    if service is not None:
                        mds_service_vdisk = MDSServiceVDisk()
                        mds_service_vdisk.vdisk = vdisk
                        mds_service_vdisk.mds_service = service.mds_service
                        mds_service_vdisk.is_master = sd_master_ip == service.storagerouter.ip and sd_master_port == service.ports[0]
                        mds_service_vdisk.save()
                        cls._logger.debug('vDisk {0} - {1}: Modeled junction service {2}:{3}'.format(vdisk.guid, vdisk.name, ip, port))
        cls._logger.info('vDisk {0} - {1}: Synced to reality'.format(vdisk.guid, vdisk.name))
Пример #18
0
    def _voldrv_arakoon_checkup(create_cluster):
        def add_service(service_storagerouter, arakoon_result):
            new_service = Service()
            new_service.name = service_name
            new_service.type = service_type
            new_service.ports = [arakoon_result['client_port'], arakoon_result['messaging_port']]
            new_service.storagerouter = service_storagerouter
            new_service.save()
            return new_service

        cluster_name = 'voldrv'
        service_name = 'arakoon-voldrv'
        service_type = ServiceTypeList.get_by_name('Arakoon')
        current_services = []
        current_ips = []
        for service in service_type.services:
            if service.name == service_name:
                current_services.append(service)
                current_ips.append(service.storagerouter.ip)
        all_sr_ips = [storagerouter.ip for storagerouter in StorageRouterList.get_slaves()]
        available_storagerouters = {}
        for storagerouter in StorageRouterList.get_masters():
            storagerouter.invalidate_dynamics(['partition_config'])
            if len(storagerouter.partition_config[DiskPartition.ROLES.DB]) > 0:
                available_storagerouters[storagerouter] = DiskPartition(storagerouter.partition_config[DiskPartition.ROLES.DB][0])
            all_sr_ips.append(storagerouter.ip)
        if create_cluster is True and len(current_services) == 0 and len(available_storagerouters) > 0:
            storagerouter, partition = available_storagerouters.items()[0]
            result = ArakoonInstaller.create_cluster(cluster_name=cluster_name,
                                                     ip=storagerouter.ip,
                                                     exclude_ports=ServiceList.get_ports_for_ip(storagerouter.ip),
                                                     base_dir=partition.folder)
            current_services.append(add_service(storagerouter, result))
            for sr_ip in all_sr_ips:
                if sr_ip not in current_ips:
                    ArakoonInstaller.deploy_to_slave(storagerouter.ip, sr_ip, cluster_name)
            ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip)
            current_ips.append(storagerouter.ip)
            StorageDriverController._configure_arakoon_to_volumedriver()

        if 0 < len(current_services) < len(available_storagerouters):
            distributed = False
            for storagerouter, partition in available_storagerouters.iteritems():
                if storagerouter.ip in current_ips:
                    continue
                result = ArakoonInstaller.extend_cluster(
                    current_services[0].storagerouter.ip,
                    storagerouter.ip,
                    cluster_name,
                    ServiceList.get_ports_for_ip(storagerouter.ip),
                    partition.folder
                )
                add_service(storagerouter, result)
                current_ips.append(storagerouter.ip)
                if distributed is False:
                    distributed = True
                    for sr_ip in all_sr_ips:
                        if sr_ip not in current_ips:
                            ArakoonInstaller.deploy_to_slave(current_services[0].storagerouter.ip, sr_ip, cluster_name)
                ArakoonInstaller.restart_cluster_add(cluster_name, current_ips, storagerouter.ip)
            StorageDriverController._configure_arakoon_to_volumedriver()
Пример #19
0
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id,
                     configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Promoting node',
                    title=True)
        service_manager = ServiceFactory.get_manager()
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(
                    ip_client_map) is False:
                raise RuntimeError(
                    'Not all memcache nodes can be reached which is required for promoting a node.'
                )

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Joining Arakoon configuration cluster')
            arakoon_installer = ArakoonInstaller(cluster_name='config')
            arakoon_installer.load(ip=master_ip)
            arakoon_installer.extend_cluster(
                new_ip=cluster_ip,
                base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=cluster_ip)
            service_manager.register_service(
                node_name=machine_id,
                service_metadata=arakoon_installer.service_metadata[cluster_ip]
            )

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(
            Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(
            cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name)
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError(
                'There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Joining Arakoon OVS DB cluster')
            arakoon_installer = ArakoonInstaller(
                cluster_name=arakoon_cluster_name)
            arakoon_installer.load()
            arakoon_installer.extend_cluster(
                new_ip=cluster_ip,
                base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            arakoon_installer.restart_cluster_after_extending(
                new_ip=cluster_ip)
            arakoon_ports = arakoon_installer.ports[cluster_ip]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(
                client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client,
                                        node_type='master',
                                        logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints',
                                  endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get(
                '/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints',
                                  endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting master node services')
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [
                    s.name for s in ServiceList.get_services() if
                    s.is_internal is False or s.storagerouter.ip == cluster_ip
            ]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(
                    ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(
                client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Copying RabbitMQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=0400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run([
                'rabbitmqctl', 'join_cluster',
                'rabbit@{0}'.format(master_hostname)
            ])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            ServiceFactory.change_service_state(target_client,
                                                'rabbitmq-server', 'start',
                                                NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(
                client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if service_manager.has_service(service, client=target_client):
                ServiceFactory.change_service_state(target_client, service,
                                                    'start',
                                                    NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(
            clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger,
                        messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(
                clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(
                client=target_client,
                logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(
                client=target_client,
                node_name=node_name,
                node_type='master',
                logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id),
                          'MASTER')
        target_client.run(
            ['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set(
            '/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id),
            True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger,
                    messages='Promote complete')
Пример #20
0
    def promote_node(cluster_ip, master_ip, ip_client_map, unique_id, configure_memcached, configure_rabbitmq):
        """
        Promotes a given node
        """
        from ovs.dal.lists.storagerouterlist import StorageRouterList
        from ovs.dal.lists.servicetypelist import ServiceTypeList
        from ovs.dal.lists.servicelist import ServiceList
        from ovs.dal.hybrids.service import Service

        Toolbox.log(logger=NodeTypeController._logger, messages='Promoting node', title=True)
        if configure_memcached is True:
            if NodeTypeController._validate_local_memcache_servers(ip_client_map) is False:
                raise RuntimeError('Not all memcache nodes can be reached which is required for promoting a node.')

        target_client = ip_client_map[cluster_ip]
        machine_id = System.get_my_machine_id(target_client)
        node_name, _ = target_client.get_hostname()
        master_client = ip_client_map[master_ip]

        storagerouter = StorageRouterList.get_by_machine_id(unique_id)
        storagerouter.node_type = 'MASTER'
        storagerouter.save()

        external_config = Configuration.get('/ovs/framework/external_config')
        if external_config is None:
            config_store = Configuration.get_store()
            if config_store == 'arakoon':
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon configuration cluster')
                metadata = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                           new_ip=cluster_ip,
                                                           cluster_name='config',
                                                           base_dir=Configuration.get('/ovs/framework/paths|ovsdb'),
                                                           ports=[26400, 26401],
                                                           filesystem=True)
                ArakoonInstaller.restart_cluster_add(cluster_name='config',
                                                     current_ips=metadata['ips'],
                                                     new_ip=cluster_ip,
                                                     filesystem=True)
                ServiceManager.register_service(node_name=machine_id,
                                                service_metadata=metadata['service_metadata'])
            else:
                from ovs.extensions.db.etcd.installer import EtcdInstaller
                Toolbox.log(logger=NodeTypeController._logger, messages='Joining Etcd cluster')
                EtcdInstaller.extend_cluster(master_ip, cluster_ip, 'config')

        # Find other (arakoon) master nodes
        arakoon_cluster_name = str(Configuration.get('/ovs/framework/arakoon_clusters|ovsdb'))
        arakoon_metadata = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=arakoon_cluster_name)
        config = ArakoonClusterConfig(cluster_id=arakoon_cluster_name, filesystem=False)
        config.load_config()
        master_node_ips = [node.ip for node in config.nodes]
        if cluster_ip in master_node_ips:
            master_node_ips.remove(cluster_ip)
        if len(master_node_ips) == 0:
            raise RuntimeError('There should be at least one other master node')

        arakoon_ports = []
        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Joining Arakoon OVS DB cluster')
            result = ArakoonInstaller.extend_cluster(master_ip=master_ip,
                                                     new_ip=cluster_ip,
                                                     cluster_name=arakoon_cluster_name,
                                                     base_dir=Configuration.get('/ovs/framework/paths|ovsdb'))
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=result['ips'],
                                                 new_ip=cluster_ip, filesystem=False)
            arakoon_ports = [result['client_port'], result['messaging_port']]

        if configure_memcached is True:
            NodeTypeController.configure_memcached(client=target_client, logger=NodeTypeController._logger)
        NodeTypeController.add_services(client=target_client, node_type='master', logger=NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Update configurations')
        if configure_memcached is True:
            endpoints = Configuration.get('/ovs/framework/memcache|endpoints')
            endpoint = '{0}:11211'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/memcache|endpoints', endpoints)
        if configure_rabbitmq is True:
            endpoints = Configuration.get('/ovs/framework/messagequeue|endpoints')
            endpoint = '{0}:5672'.format(cluster_ip)
            if endpoint not in endpoints:
                endpoints.append(endpoint)
                Configuration.set('/ovs/framework/messagequeue|endpoints', endpoints)

        if arakoon_metadata['internal'] is True:
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting master node services')
            ArakoonInstaller.restart_cluster_add(cluster_name=arakoon_cluster_name,
                                                 current_ips=master_node_ips,
                                                 new_ip=cluster_ip,
                                                 filesystem=False)
            PersistentFactory.store = None
            VolatileFactory.store = None

            if 'arakoon-ovsdb' not in [s.name for s in ServiceList.get_services() if s.is_internal is False or s.storagerouter.ip == cluster_ip]:
                service = Service()
                service.name = 'arakoon-ovsdb'
                service.type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ARAKOON)
                service.ports = arakoon_ports
                service.storagerouter = storagerouter
                service.save()

        if configure_rabbitmq is True:
            NodeTypeController.configure_rabbitmq(client=target_client, logger=NodeTypeController._logger)
            # Copy rabbitmq cookie
            rabbitmq_cookie_file = '/var/lib/rabbitmq/.erlang.cookie'

            Toolbox.log(logger=NodeTypeController._logger, messages='Copying Rabbit MQ cookie')
            contents = master_client.file_read(rabbitmq_cookie_file)
            master_hostname, _ = master_client.get_hostname()
            target_client.dir_create(os.path.dirname(rabbitmq_cookie_file))
            target_client.file_write(rabbitmq_cookie_file, contents)
            target_client.file_chmod(rabbitmq_cookie_file, mode=400)
            target_client.run(['rabbitmq-server', '-detached'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop_app'])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'join_cluster', 'rabbit@{0}'.format(master_hostname)])
            time.sleep(5)
            target_client.run(['rabbitmqctl', 'stop'])
            time.sleep(5)

            # Enable HA for the rabbitMQ queues
            Toolbox.change_service_state(target_client, 'rabbitmq-server', 'start', NodeTypeController._logger)
            NodeTypeController.check_rabbitmq_and_enable_ha_mode(client=target_client, logger=NodeTypeController._logger)

        NodeTypeController._configure_amqp_to_volumedriver()

        Toolbox.log(logger=NodeTypeController._logger, messages='Starting services')
        services = ['memcached', 'arakoon-ovsdb', 'rabbitmq-server', 'etcd-config']
        if arakoon_metadata['internal'] is True:
            services.remove('arakoon-ovsdb')
        for service in services:
            if ServiceManager.has_service(service, client=target_client):
                Toolbox.change_service_state(target_client, service, 'start', NodeTypeController._logger)

        Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
        NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if Toolbox.run_hooks(component='nodetype',
                             sub_component='promote',
                             logger=NodeTypeController._logger,
                             cluster_ip=cluster_ip,
                             master_ip=master_ip):
            Toolbox.log(logger=NodeTypeController._logger, messages='Restarting services')
            NodeTypeController.restart_framework_and_memcache_services(clients=ip_client_map, logger=NodeTypeController._logger)

        if NodeTypeController.avahi_installed(client=target_client, logger=NodeTypeController._logger) is True:
            NodeTypeController.configure_avahi(client=target_client, node_name=node_name, node_type='master', logger=NodeTypeController._logger)
        Configuration.set('/ovs/framework/hosts/{0}/type'.format(machine_id), 'MASTER')
        target_client.run(['chown', '-R', 'ovs:ovs', '/opt/OpenvStorage/config'])
        Configuration.set('/ovs/framework/hosts/{0}/promotecompleted'.format(machine_id), True)

        if target_client.file_exists('/tmp/ovs_rollback'):
            target_client.file_delete('/tmp/ovs_rollback')

        Toolbox.log(logger=NodeTypeController._logger, messages='Promote complete')
Пример #21
0
    def test_collapse():
        """
        Test the arakoon collapsing

        :return:
        """
        ArakoonCollapse.LOGGER.info("Starting validating arakoon collapse")
        node_ips = StoragerouterHelper.get_storagerouter_ips()
        node_ips.sort()
        for node_ip in node_ips:
            ArakoonCollapse.LOGGER.info(
                "Fetching arakoons on node `{0}`".format(node_ip))
            arakoon_clusters = []
            root_client = SSHClient(node_ip, username='******')

            # fetch arakoon clusters
            for service in ServiceList.get_services():
                if service.is_internal is True and service.storagerouter.ip == node_ip and \
                    service.type.name in (ServiceType.SERVICE_TYPES.ARAKOON,
                                          ServiceType.SERVICE_TYPES.NS_MGR,
                                          ServiceType.SERVICE_TYPES.ALBA_MGR):
                    arakoon_clusters.append(
                        service.name.replace('arakoon-', ''))

            # perform collapse
            ArakoonCollapse.LOGGER.info(
                "Starting arakoon collapse on node `{0}`".format(node_ip))
            for arakoon_cluster in arakoon_clusters:
                ArakoonCollapse.LOGGER.info(
                    "Fetching `{0}` arakoon on node `{1}`".format(
                        arakoon_cluster, node_ip))
                arakoon_config_path = Configuration.get_configuration_path(
                    '/ovs/arakoon/{0}/config'.format(arakoon_cluster))
                tlog_location = '/opt/OpenvStorage/db/arakoon/{0}/tlogs'.format(
                    arakoon_cluster)

                # read_tlog_dir
                with remote(node_ip, [Configuration]) as rem:
                    config_contents = rem.Configuration.get(
                        '/ovs/arakoon/{0}/config'.format(arakoon_cluster),
                        raw=True)
                for line in config_contents.splitlines():
                    if 'tlog_dir' in line:
                        tlog_location = line.split()[-1]

                nr_of_tlogs = ArakoonCollapse.get_nr_of_tlogs_in_folder(
                    root_client, tlog_location)
                old_headdb_timestamp = 0
                if root_client.file_exists('/'.join([tlog_location,
                                                     'head.db'])):
                    old_headdb_timestamp = root_client.run([
                        'stat', '--format=%Y',
                        '{0}/{1}'.format(tlog_location, 'head.db')
                    ])
                if nr_of_tlogs <= 2:
                    benchmark_command = [
                        'arakoon', '--benchmark', '-n_clients', '1', '-max_n',
                        '5_000', '-config', arakoon_config_path
                    ]
                    root_client.run(benchmark_command)

                ArakoonCollapse.LOGGER.info(
                    "Collapsing arakoon `{0}` on node `{1}` ...".format(
                        arakoon_cluster, node_ip))
                GenericController.collapse_arakoon()

                nr_of_tlogs = ArakoonCollapse.get_nr_of_tlogs_in_folder(
                    root_client, tlog_location)
                new_headdb_timestamp = root_client.run([
                    'stat', '--format=%Y',
                    '{0}/{1}'.format(tlog_location, 'head.db')
                ])

                # perform assertion
                assert nr_of_tlogs <= 2,\
                    'Arakoon collapse left {0} tlogs on the environment, expecting less than 2 in `{1}` on node `{1}`'\
                    .format(nr_of_tlogs, arakoon_cluster, node_ip)
                assert old_headdb_timestamp != new_headdb_timestamp,\
                    'Timestamp of the head_db file was not changed ' \
                    'in the process of collapsing tlogs of arakoon `{0}` on node `{1}`'\
                    .format(arakoon_cluster, node_ip)

                ArakoonCollapse.LOGGER.info(
                    "Successfully collapsed arakoon `{0}` on node `{1}`".
                    format(arakoon_cluster, node_ip))

        ArakoonCollapse.LOGGER.info("Finished validating arakoon collapsing")