def extend_cluster(master_ip, new_ip, cluster_name, exclude_ports, base_dir):
        """
        Extends a cluster to a given new node
        """
        logger.debug("Extending cluster {0} from {1} to {2}".format(cluster_name, master_ip, new_ip))
        client = SSHClient(master_ip)
        config = ArakoonClusterConfig(cluster_name)
        config.load_config(client)

        client = SSHClient(new_ip)
        base_dir = base_dir.rstrip("/")
        port_range = client.config_read("ovs.ports.arakoon")
        ports = System.get_free_ports(port_range, exclude_ports, 2, client)
        node_name = System.get_my_machine_id(client)

        if not [node.name for node in config.nodes if node.name == node_name]:
            config.nodes.append(
                ArakoonNodeConfig(
                    name=node_name,
                    ip=new_ip,
                    client_port=ports[0],
                    messaging_port=ports[1],
                    log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name),
                    home=ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name),
                    tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name),
                )
            )
        ArakoonInstaller._deploy(config)
        logger.debug("Extending cluster {0} from {1} to {2} completed".format(cluster_name, master_ip, new_ip))
        return {"client_port": ports[0], "messaging_port": ports[1]}
    def create_cluster(cluster_name, ip, exclude_ports, base_dir, plugins=None):
        """
        Creates a cluster
        """
        logger.debug("Creating cluster {0} on {1}".format(cluster_name, ip))
        client = SSHClient(ip)
        base_dir = base_dir.rstrip("/")
        port_range = client.config_read("ovs.ports.arakoon")
        ports = System.get_free_ports(port_range, exclude_ports, 2, client)
        node_name = System.get_my_machine_id(client)

        config = ArakoonClusterConfig(cluster_name, plugins)
        if not [node.name for node in config.nodes if node.name == node_name]:
            config.nodes.append(
                ArakoonNodeConfig(
                    name=node_name,
                    ip=ip,
                    client_port=ports[0],
                    messaging_port=ports[1],
                    log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name),
                    home=ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name),
                    tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name),
                )
            )
        ArakoonInstaller._deploy(config)
        logger.debug("Creating cluster {0} on {1} completed".format(cluster_name, ip))
        return {"client_port": ports[0], "messaging_port": ports[1]}
    def extend_cluster(master_ip, new_ip, cluster_name, exclude_ports):
        """
        Extends a cluster to a given new node
        """
        logger.debug('Extending cluster {0} from {1} to {2}'.format(
            cluster_name, master_ip, new_ip))
        client = SSHClient(master_ip)
        config = ArakoonClusterConfig(cluster_name)
        config.load_config(client)

        client = SSHClient(new_ip)
        base_dir = client.config_read('ovs.arakoon.location').rstrip('/')
        port_range = client.config_read('ovs.ports.arakoon')
        ports = System.get_free_ports(port_range, exclude_ports, 2, client)
        node_name = System.get_my_machine_id(client)

        if not [node.name for node in config.nodes if node.name == node_name]:
            config.nodes.append(
                ArakoonNodeConfig(
                    name=node_name,
                    ip=new_ip,
                    client_port=ports[0],
                    messaging_port=ports[1],
                    log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(
                        cluster_name),
                    home=ArakoonInstaller.ARAKOON_HOME_DIR.format(
                        base_dir, cluster_name),
                    tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(
                        base_dir, cluster_name)))
        ArakoonInstaller._deploy(config)
        logger.debug('Extending cluster {0} from {1} to {2} completed'.format(
            cluster_name, master_ip, new_ip))
        return {'client_port': ports[0], 'messaging_port': ports[1]}
    def status(cluster_name, ip):
        client = SSHClient.load(ip)
        cmd = """
from ovs.plugin.provider.service import Service
print Service.get_service_status('arakoon-{0}')
""".format(cluster_name)
        System.exec_remote_python(client, cmd)
    def create_cluster(cluster_name, ip, exclude_ports, plugins=None):
        """
        Creates a cluster
        """
        logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip))
        client = SSHClient(ip)
        base_dir = client.config_read('ovs.arakoon.location').rstrip('/')
        port_range = client.config_read('ovs.ports.arakoon')
        ports = System.get_free_ports(port_range, exclude_ports, 2, client)
        node_name = System.get_my_machine_id(client)

        config = ArakoonClusterConfig(cluster_name, plugins)
        if not [node.name for node in config.nodes if node.name == node_name]:
            config.nodes.append(
                ArakoonNodeConfig(
                    name=node_name,
                    ip=ip,
                    client_port=ports[0],
                    messaging_port=ports[1],
                    log_dir=ArakoonInstaller.ARAKOON_LOG_DIR.format(
                        cluster_name),
                    home=ArakoonInstaller.ARAKOON_HOME_DIR.format(
                        base_dir, cluster_name),
                    tlog_dir=ArakoonInstaller.ARAKOON_TLOG_DIR.format(
                        base_dir, cluster_name)))
        ArakoonInstaller._deploy(config)
        logger.debug('Creating cluster {0} on {1} completed'.format(
            cluster_name, ip))
        return {'client_port': ports[0], 'messaging_port': ports[1]}
 def _get_free_ports(client):
     node_name = System.get_my_machine_id(client)
     clusters = []
     exclude_ports = []
     if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT):
         for cluster_name in EtcdConfiguration.list(
                 ArakoonInstaller.ETCD_CONFIG_ROOT):
             try:
                 config = ArakoonClusterConfig(cluster_name)
                 config.load_config()
                 for node in config.nodes:
                     if node.name == node_name:
                         clusters.append(cluster_name)
                         exclude_ports.append(node.client_port)
                         exclude_ports.append(node.messaging_port)
             except:
                 logger.error(
                     '  Could not load port information of cluster {0}'.
                     format(cluster_name))
     ports = System.get_free_ports(
         EtcdConfiguration.get(
             '/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)),
         exclude_ports, 2, client)
     logger.debug(
         '  Loaded free ports {0} based on existing clusters {1}'.format(
             ports, clusters))
     return ports
    def catchup_cluster_node(cluster_name, ip):
        client = SSHClient.load(ip)
        cmd = """
from ovs.extensions.db.arakoon.ArakoonManagement import ArakoonManagementEx
cluster = ArakoonManagementEx().getCluster('{0}')
cluster.catchup_node()
""".format(cluster_name)
        System.exec_remote_python(client, cmd)
    def delete_dir_structure(self, client=None, cluster_name=None):
        if cluster_name is None:
            cluster_name = self.config.cluster_name
        cmd = """
rm -rf {0}/arakoon/{1}
rm -rf {0}/tlogs/{1}
rm -rf /var/log/arakoon/{1}
""".format(self.config.base_dir, cluster_name)
        System.run(cmd, client)
    def create_dir_structure(self, client=None, cluster_name=None):
        if cluster_name is None:
            cluster_name = self.config.cluster_name
        cmd = """
mkdir -p {0}/arakoon/{1}
mkdir -p {0}/tlogs/{1}
mkdir -p /var/log/arakoon/{1}
""".format(self.config.base_dir, cluster_name)
        System.run(cmd, client)
class Helper(object):
    """
    Helper module
    """
    MODULE = "utils"
    SETTINGS_LOC = "/opt/OpenvStorage/config/healthcheck/settings.json"
    RAW_INIT_MANAGER = str(
        subprocess.check_output('cat /proc/1/comm', shell=True)).strip()
    LOCAL_SR = System.get_my_storagerouter()
    LOCAL_ID = System.get_my_machine_id()

    with open(SETTINGS_LOC) as settings_file:
        settings = json.load(settings_file)

    debug_mode = settings["healthcheck"]["debug_mode"]
    enable_logging = settings["healthcheck"]["logging"]["enable"]
    max_log_size = settings["healthcheck"]["max_check_log_size"]
    packages = settings["healthcheck"]["package_list"]
    extra_ports = settings["healthcheck"]["extra_ports"]
    rights_dirs = settings["healthcheck"]["rights_dirs"]
    owners_files = settings["healthcheck"]["owners_files"]
    max_hours_zero_disk_safety = settings["healthcheck"][
        "max_hours_zero_disk_safety"]

    @staticmethod
    def get_healthcheck_version():
        """
        Gets the installed healthcheck version
        :return: version number of the installed healthcheck
        :rtype: str
        """
        client = SSHClient(System.get_my_storagerouter())
        package_name = 'openvstorage-health-check'
        package_manager = PackageFactory.get_manager()
        packages = package_manager.get_installed_versions(
            client=client, package_names=[package_name])
        return packages.get(package_name, 'unknown')

    @staticmethod
    def get_local_settings():
        """
        Fetch settings of the local Open vStorage node
        :return: local settings of the node
        :rtype: dict
        """
        # Fetch all details
        local_settings = {
            'cluster_id': Configuration.get("/ovs/framework/cluster_id"),
            'hostname': socket.gethostname(),
            'storagerouter_id': Helper.LOCAL_ID,
            'storagerouter_type': Helper.LOCAL_SR.node_type,
            'environment os': ' '.join(platform.linux_distribution())
        }
        return local_settings
 def create_cluster(cluster_name, ip, exclude_ports, plugins=None):
     ai = ArakoonInstaller()
     ai.clear_config()
     client = SSHClient.load(ip)
     port_range = System.read_remote_config(client, 'ovs.ports.arakoon')
     free_ports = System.get_free_ports(port_range, exclude_ports, 2, client)
     ai.create_config(cluster_name, ip, free_ports[0], free_ports[1], plugins)
     ai.generate_configs(client)
     ai.create_dir_structure(client)
     return {'client_port': free_ports[0],
             'messaging_port': free_ports[1]}
 def extend_cluster(src_ip, tgt_ip, cluster_name, exclude_ports):
     ai = ArakoonInstaller()
     ai.load_config_from(cluster_name, src_ip)
     client = SSHClient.load(tgt_ip)
     tgt_id = System.get_my_machine_id(client)
     port_range = System.read_remote_config(client, 'ovs.ports.arakoon')
     free_ports = System.get_free_ports(port_range, exclude_ports, 2, client)
     ai.create_dir_structure(client)
     ai.add_node_to_config(tgt_id, tgt_ip, free_ports[0], free_ports[1])
     ai.upload_config_for(cluster_name)
     return {'client_port': free_ports[0],
             'messaging_port': free_ports[1]}
示例#13
0
 def update_storagedrivers(storagedriver_guids, storagerouters, parameters):
     """
     Add/remove multiple vPools
     @param storagedriver_guids: Storage Drivers to be removed
     @param storagerouters: StorageRouters on which to add a new link
     @param parameters: Settings for new links
     """
     success = True
     # Add Storage Drivers
     for storagerouter_ip, storageapplaince_machineid in storagerouters:
         try:
             new_parameters = copy.copy(parameters)
             new_parameters['storagerouter_ip'] = storagerouter_ip
             local_machineid = System.get_my_machine_id()
             if local_machineid == storageapplaince_machineid:
                 # Inline execution, since it's on the same node (preventing deadlocks)
                 StorageRouterController.add_vpool(new_parameters)
             else:
                 # Async execution, since it has to be executed on another node
                 # @TODO: Will break in Celery 3.2, need to find another solution
                 # Requirements:
                 # - This code cannot continue until this new task is completed (as all these Storage Router
                 #   need to be handled sequentially
                 # - The wait() or get() method are not allowed anymore from within a task to prevent deadlocks
                 result = StorageRouterController.add_vpool.s(new_parameters).apply_async(
                     routing_key='sr.{0}'.format(storageapplaince_machineid)
                 )
                 result.wait()
         except:
             success = False
     # Remove Storage Drivers
     for storagedriver_guid in storagedriver_guids:
         try:
             storagedriver = StorageDriver(storagedriver_guid)
             storagerouter_machineid = storagedriver.storagerouter.machine_id
             local_machineid = System.get_my_machine_id()
             if local_machineid == storagerouter_machineid:
                 # Inline execution, since it's on the same node (preventing deadlocks)
                 StorageRouterController.remove_storagedriver(storagedriver_guid)
             else:
                 # Async execution, since it has to be executed on another node
                 # @TODO: Will break in Celery 3.2, need to find another solution
                 # Requirements:
                 # - This code cannot continue until this new task is completed (as all these VSAs need to be
                 # handled sequentially
                 # - The wait() or get() method are not allowed anymore from within a task to prevent deadlocks
                 result = StorageRouterController.remove_storagedriver.s(storagedriver_guid).apply_async(
                     routing_key='sr.{0}'.format(storagerouter_machineid)
                 )
                 result.wait()
         except:
             success = False
     return success
示例#14
0
    def create(self):
        """
        Prepares a new Storagedriver for a given vPool and Storagerouter
        :return: None
        :rtype: NoneType
        """
        if self.sr_installer is None:
            raise RuntimeError('No StorageRouterInstaller instance found')

        machine_id = System.get_my_machine_id(client=self.sr_installer.root_client)
        port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
        storagerouter = self.sr_installer.storagerouter
        with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30):
            model_ports_in_use = []
            for sd in StorageDriverList.get_storagedrivers():
                if sd.storagerouter_guid == storagerouter.guid:
                    model_ports_in_use += sd.ports.values()
                    for proxy in sd.alba_proxies:
                        model_ports_in_use.append(proxy.service.ports[0])
            ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client)

            vpool = self.vp_installer.vpool
            vrouter_id = '{0}{1}'.format(vpool.name, machine_id)
            storagedriver = StorageDriver()
            storagedriver.name = vrouter_id.replace('_', ' ')
            storagedriver.ports = {'management': ports[0],
                                   'xmlrpc': ports[1],
                                   'dtl': ports[2],
                                   'edge': ports[3]}
            storagedriver.vpool = vpool
            storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))
            storagedriver.storage_ip = self.storage_ip
            storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name)
            storagedriver.description = storagedriver.name
            storagedriver.storagerouter = storagerouter
            storagedriver.storagedriver_id = vrouter_id
            storagedriver.save()

            # ALBA Proxies
            proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY)
            for proxy_id in xrange(self.sr_installer.requested_proxies):
                service = Service()
                service.storagerouter = storagerouter
                service.ports = [ports[4 + proxy_id]]
                service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id)
                service.type = proxy_service_type
                service.save()
                alba_proxy = AlbaProxy()
                alba_proxy.service = service
                alba_proxy.storagedriver = storagedriver
                alba_proxy.save()
        self.storagedriver = storagedriver
示例#15
0
 def get_local_storagerouter():
     """
     Fetches the details of a local storagerouter
     :return: a StorageRouter
     :rtype: ovs.dal.hybrids.storagerouter.StorageRouter
     """
     return System.get_my_storagerouter()
示例#16
0
    def process_IN_MOVED_TO(self, event):

        try:
            self._logger.debug('path: {0} - name: {1} - moved to'.format(
                event.path, event.name))

            if self._is_run_watcher(event.path):
                self.invalidate_vmachine_status(event.name)
                return

            vpool_path = '/mnt/' + self.get_vpool_for_vm(event.pathname)
            if vpool_path == '/mnt/':
                self._logger.warning(
                    'Vmachine not on vpool or invalid xml format for {0}'.
                    format(event.pathname))

            if os.path.exists(vpool_path):
                machine_id = System.get_my_machine_id()
                target_path = vpool_path + '/' + machine_id + '/'
                target_xml = target_path + event.name
                if not os.path.exists(target_path):
                    os.mkdir(target_path)
                shutil.copy2(event.pathname, target_xml)
        except Exception as exception:
            self._logger.error(
                'Exception during process_IN_MOVED_TO: {0}'.format(
                    str(exception)),
                print_msg=True)
示例#17
0
    def tick(self):
        """
        Runs one iteration of the scheduler. This is guarded with a distributed lock
        """
        self._logger.debug('DS executing tick')
        try:
            self._has_lock = False
            with self._mutex:
                # noinspection PyProtectedMember
                node_now = current_app._get_current_object().now()
                node_timestamp = time.mktime(node_now.timetuple())
                node_name = System.get_my_machine_id()
                try:
                    lock = self._persistent.get(self._lock_name)
                except KeyNotFoundException:
                    lock = None
                if lock is None:
                    # There is no lock yet, so the lock is acquired
                    self._has_lock = True
                    self._logger.debug('DS there was no lock in tick')
                else:
                    if lock['name'] == node_name:
                        # The current node holds the lock
                        self._logger.debug('DS keeps own lock')
                        self._has_lock = True
                    elif node_timestamp - lock[
                            'timestamp'] > DistributedScheduler.TIMEOUT:
                        # The current lock is timed out, so the lock is stolen
                        self._logger.debug(
                            'DS last lock refresh is {0}s old'.format(
                                node_timestamp - lock['timestamp']))
                        self._logger.debug('DS stealing lock from {0}'.format(
                            lock['name']))
                        self._load_schedule()
                        self._has_lock = True
                    else:
                        self._logger.debug('DS lock is not ours')
                if self._has_lock is True:
                    lock = {'name': node_name, 'timestamp': node_timestamp}
                    self._logger.debug('DS refreshing lock')
                    self._persistent.set(self._lock_name, lock)

            if self._has_lock is True:
                self._logger.debug('DS executing tick workload')
                remaining_times = []
                try:
                    for entry in self.schedule.itervalues():
                        next_time_to_run = self.maybe_due(
                            entry, self.publisher)
                        if next_time_to_run:
                            remaining_times.append(next_time_to_run)
                except RuntimeError:
                    pass
                self._logger.debug('DS executing tick workload - done')
                return min(remaining_times + [self.max_interval])
            else:
                return self.max_interval
        except Exception as ex:
            self._logger.debug('DS got error during tick: {0}'.format(ex))
            return self.max_interval
示例#18
0
    def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name):
        """
        Removes a node from a cluster, the old node will become a slave
        :param cluster_name: The name of the cluster to shrink
        :param deleted_node_ip: The ip of the node that should be deleted
        :param remaining_node_ip: The ip of a remaining node
        """
        logger.debug('Shrinking cluster "{0}" from {1}'.format(
            cluster_name, deleted_node_ip))

        current_client = SSHClient(remaining_node_ip, username='******')
        if not EtcdInstaller._is_healty(cluster_name, current_client):
            raise RuntimeError(
                'Cluster "{0}" unhealthy, aborting shrink'.format(
                    cluster_name))

        old_client = SSHClient(deleted_node_ip, username='******')
        node_name = System.get_my_machine_id(old_client)
        node_id = None
        for item in current_client.run('etcdctl member list').splitlines():
            info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict()
            if info['name'] == node_name:
                node_id = info['id']
        if node_id is None:
            raise RuntimeError(
                'Could not locate {0} in the cluster'.format(deleted_node_ip))
        current_client.run('etcdctl member remove {0}'.format(node_id))
        EtcdInstaller.deploy_to_slave(remaining_node_ip, deleted_node_ip,
                                      cluster_name)
        EtcdInstaller.wait_for_cluster(cluster_name, current_client)

        logger.debug('Shrinking cluster "{0}" from {1} completed'.format(
            cluster_name, deleted_node_ip))
示例#19
0
    def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name):
        """
        Removes a node from a cluster, the old node will become a slave
        :param cluster_name: The name of the cluster to shrink
        :param deleted_node_ip: The ip of the node that should be deleted
        :param remaining_node_ip: The ip of a remaining node
        """
        logger.debug('Shrinking cluster "{0}" from {1}'.format(cluster_name, deleted_node_ip))

        current_client = SSHClient(remaining_node_ip, username='******')
        if not EtcdInstaller._is_healty(cluster_name, current_client):
            raise RuntimeError('Cluster "{0}" unhealthy, aborting shrink'.format(cluster_name))

        old_client = SSHClient(deleted_node_ip, username='******')
        node_name = System.get_my_machine_id(old_client)
        node_id = None
        for item in current_client.run('etcdctl member list').splitlines():
            info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict()
            if info['name'] == node_name:
                node_id = info['id']
        if node_id is None:
            raise RuntimeError('Could not locate {0} in the cluster'.format(deleted_node_ip))
        current_client.run('etcdctl member remove {0}'.format(node_id))
        EtcdInstaller.deploy_to_slave(remaining_node_ip, deleted_node_ip, cluster_name)
        EtcdInstaller.wait_for_cluster(cluster_name, current_client)

        logger.debug('Shrinking cluster "{0}" from {1} completed'.format(cluster_name, deleted_node_ip))
示例#20
0
    def migrate(master_ips=None, extra_ips=None):
        """
        Executes all migrations. It keeps track of an internal "migration version" which is always increasing by one
        :param master_ips: IP addresses of the MASTER nodes
        :param extra_ips: IP addresses of the EXTRA nodes
        """
        machine_id = System.get_my_machine_id()
        key = '/ovs/framework/hosts/{0}/versions'.format(machine_id)
        data = Configuration.get(key) if Configuration.exists(key) else {}
        migrators = []
        path = '/'.join([os.path.dirname(__file__), 'migration'])
        for filename in os.listdir(path):
            if os.path.isfile('/'.join([path, filename])) and filename.endswith('.py'):
                name = filename.replace('.py', '')
                module = imp.load_source(name, '/'.join([path, filename]))
                for member in inspect.getmembers(module):
                    if inspect.isclass(member[1]) and member[1].__module__ == name and 'object' in [base.__name__ for base in member[1].__bases__]:
                        migrators.append((member[1].identifier, member[1].migrate))

        end_version = 0
        for identifier, method in migrators:
            base_version = data[identifier] if identifier in data else 0
            version = method(base_version, master_ips, extra_ips)
            if version > end_version:
                end_version = version
            data[identifier] = end_version

        Configuration.set(key, data)
示例#21
0
 def wrapped(*args, **kwargs):
     if lock_type == 'local':
         _mutex = file_mutex(key)
     elif lock_type == 'cluster':
         _mutex = volatile_mutex(key)
     else:
         raise ValueError(
             'Lock type {0} is not supported!'.format(lock_type))
     try:
         _mutex.acquire(wait=0.005)
         local_sr = System.get_my_storagerouter()
         CacheHelper.set(key=key,
                         item={
                             'ip': local_sr.ip,
                             'hostname': local_sr.name
                         },
                         expire_time=60)
         return func(*args, **kwargs)
     except (NoFileLockAvailableException,
             NoVolatileLockAvailableException):
         if callback is None:
             return
         else:
             executor_info = None
             start = time.time()
             while executor_info is None:
                 # Calculated guesswork. If a callback function would be expected, the acquire has happened for another executor  the volatilekey should be set eventually
                 # However by setting it after the acquire, the callback executor and original method executor can race between fetch and set
                 # A better implementation would be relying on the fwk ensure_single_decorator as they check for various races themselves
                 # This is just a poor mans, temporary implementation
                 if start - time.time() > 5:
                     raise ValueError(
                         'Timed out after 5 seconds while fetching the information about the executor.'
                     )
                 try:
                     executor_info = CacheHelper.get(key=key)
                 except:
                     pass
             callback_func = callback.__func__ if isinstance(
                 callback, staticmethod) else callback
             argnames = inspect.getargspec(callback_func)[0]
             arguments = list(args)
             kwargs.update({'test_name': func.__name__})
             if executor_info is not None:
                 kwargs.update(executor_info)
                 if 'result_handler' in argnames:
                     result_handler = kwargs.get('result_handler')
                     for index, arg in enumerate(arguments):
                         if isinstance(arg,
                                       HCResults.HCResultCollector):
                             result_handler = arguments.pop(index)
                             break
                     if result_handler is None:
                         raise TypeError(
                             'Expected an instance of {}'.format(
                                 type(HCResults.HCResultCollector)))
                     kwargs['result_handler'] = result_handler
             return callback_func(*tuple(arguments), **kwargs)
     finally:
         _mutex.release()
示例#22
0
    def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True):
        """
        Extends a cluster to a given new node
        :param master_ip: IP of one of the already existing nodes
        :type master_ip: str

        :param new_ip: IP address of the node to be added
        :type new_ip: str

        :param cluster_name: Name of the cluster to be extended
        :type cluster_name: str

        :param base_dir: Base directory that will hold the db and tlogs
        :type base_dir: str

        :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool

        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip))
        base_dir = base_dir.rstrip('/')

        config = ArakoonClusterConfig(cluster_name)
        config.load_config()

        client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(new_ip, {log_dir: True,
                                                              home_dir: False,
                                                              tlog_dir: False})

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            if node_name not in [node.name for node in config.nodes]:
                config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                      ip=new_ip,
                                                      client_port=ports[0],
                                                      messaging_port=ports[1],
                                                      log_dir=log_dir,
                                                      home=home_dir,
                                                      tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
        return {'client_port': ports[0],
                'messaging_port': ports[1]}
示例#23
0
    def pulse():
        """
        Update the heartbeats for the Current Routers
        :return: None
        """
        logger = Logger('extensions-generic')
        machine_id = System.get_my_machine_id()
        current_time = int(time.time())

        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.machine_id == machine_id:
                with volatile_mutex('storagerouter_heartbeat_{0}'.format(
                        node.guid)):
                    node_save = StorageRouter(node.guid)
                    node_save.heartbeats['process'] = current_time
                    node_save.save()
                StorageRouterController.ping.s(
                    node.guid, current_time).apply_async(
                        routing_key='sr.{0}'.format(machine_id))
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats[
                                'process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d '{0}'".format(
                                node.name.replace(r"'", r"'\''")),
                                         shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
示例#24
0
    def __init__(self):
        """
        Initializes the client
        """
        # Safe calls
        self._node_id = System.get_my_machine_id().replace(r"'", r"'\''")
        # Alba is currently always installed but the Alba version/package info is located in the SDM section
        self._package_manager = PackageFactory.get_manager()
        self._service_manager = ServiceFactory.get_manager()

        self._service_type = ServiceFactory.get_service_type()
        if self._service_type != 'systemd':
            raise NotImplementedError('Only Systemd is supported')

        # Potential failing calls
        self._cluster_id = self.get_config_key(
            self.LOCATION_CLUSTER_ID,
            fallback=[CONFIG_STORE_LOCATION, 'cluster_id'])
        self.interval = self.get_config_key(
            self.LOCATION_INTERVAL,
            fallback=[self.FALLBACK_CONFIG, self.KEY_INTERVAL],
            default=self.DEFAULT_INTERVAL)
        self._openvpn_service_name = 'openvpn@ovs_{0}-{1}'.format(
            self._cluster_id, self._node_id)

        # Calls to look out for. These could still be None when using them
        self._storagerouter = None
        self._client = None
        self._set_storagerouter()
        self._set_client()

        # Safe call, start caching
        self.caching = SupportAgentCache(self)
 def check_dtl(result_handler):
     """
     Checks the dtl for all vdisks on the local node
     :param result_handler: logging object
     :type result_handler: ovs.extensions.healthcheck.result.HCResults
     :return: None
     :rtype: NoneType
     """
     # Fetch vdisks hosted on this machine
     local_sr = System.get_my_storagerouter()
     if len(local_sr.vdisks_guids) == 0:
         return result_handler.skip('No VDisks present in cluster.')
     for vdisk_guid in local_sr.vdisks_guids:
         vdisk = VDisk(vdisk_guid)
         vdisk.invalidate_dynamics(['dtl_status', 'info'])
         if vdisk.dtl_status == 'ok_standalone' or vdisk.dtl_status == 'disabled':
             result_handler.success('VDisk {0}s DTL is disabled'.format(vdisk.name), code=ErrorCodes.volume_dtl_standalone)
         elif vdisk.dtl_status == 'ok_sync':
             result_handler.success('VDisk {0}s DTL is enabled and running.'.format(vdisk.name), code=ErrorCodes.volume_dtl_ok)
         elif vdisk.dtl_status == 'degraded':
             result_handler.warning('VDisk {0}s DTL is degraded.'.format(vdisk.name), code=ErrorCodes.volume_dtl_degraded)
         elif vdisk.dtl_status == 'checkup_required':
             result_handler.warning('VDisk {0}s DTL should be configured.'.format(vdisk.name), code=ErrorCodes.volume_dtl_checkup_required)
         elif vdisk.dtl_status == 'catch_up':
             result_handler.warning('VDisk {0}s DTL is enabled but still syncing.'.format(vdisk.name), code=ErrorCodes.volume_dtl_catch_up)
         else:
             result_handler.warning('VDisk {0}s DTL has an unknown status: {1}.'.format(vdisk.name, vdisk.dtl_status), code=ErrorCodes.volume_dtl_unknown)
示例#26
0
    def pulse():
        """
        Update the heartbeats for the Current Routers
        :return: None
        """
        logger = LogHandler.get('extensions', name='heartbeat')
        machine_id = System.get_my_machine_id()
        current_time = int(time.time())

        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.machine_id == machine_id:
                with volatile_mutex('storagerouter_heartbeat_{0}'.format(node.guid)):
                    node_save = StorageRouter(node.guid)
                    node_save.heartbeats['process'] = current_time
                    node_save.save()
                StorageRouterController.ping.s(node.guid, current_time).apply_async(routing_key='sr.{0}'.format(machine_id))
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d '{0}'".format(node.name.replace(r"'", r"'\''")), shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
示例#27
0
    def add_services(client, node_type, logger):
        """
        Add the services required by the OVS cluster
        :param client: Client on which to add the services
        :type client: ovs.extensions.generic.sshclient.SSHClient
        :param node_type: Type of node ('master' or 'extra')
        :type node_type: str
        :param logger: Logger object used for logging
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        """
        Toolbox.log(logger=logger, messages='Adding services')
        services = {}
        worker_queue = System.get_my_machine_id(client=client)
        if node_type == 'master':
            worker_queue += ',ovs_masters'
            services.update({'memcached': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue},
                             'rabbitmq-server': {'MEMCACHE_NODE_IP': client.ip, 'WORKER_QUEUE': worker_queue},
                             'scheduled-tasks': {},
                             'webapp-api': {},
                             'volumerouter-consumer': {}})
        services.update({'workers': {'WORKER_QUEUE': worker_queue},
                         'watcher-framework': {}})

        for service_name, params in services.iteritems():
            if not ServiceManager.has_service(service_name, client):
                Toolbox.log(logger=logger, messages='Adding service {0}'.format(service_name))
                ServiceManager.add_service(name=service_name, params=params, client=client)
示例#28
0
    def pulse():
        """
        Update the heartbeats for all Storage Routers
        :return: None
        """
        logger = LogHandler.get('extensions', name='heartbeat')

        current_time = int(time.time())
        machine_id = System.get_my_machine_id()
        amqp = '{0}://{1}:{2}@{3}//'.format(EtcdConfiguration.get('/ovs/framework/messagequeue|protocol'),
                                            EtcdConfiguration.get('/ovs/framework/messagequeue|user'),
                                            EtcdConfiguration.get('/ovs/framework/messagequeue|password'),
                                            EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id)))

        celery_path = OSManager.get_path('celery')
        worker_states = check_output("{0} inspect ping -b {1} --timeout=5 2> /dev/null | grep OK | perl -pe 's/\x1b\[[0-9;]*m//g' || true".format(celery_path, amqp), shell=True)
        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.heartbeats is None:
                node.heartbeats = {}
            if 'celery@{0}: OK'.format(node.name) in worker_states:
                node.heartbeats['celery'] = current_time
            if node.machine_id == machine_id:
                node.heartbeats['process'] = current_time
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d {0}".format(node.name), shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
            node.save()
    def _create_vpool(self):
        """
        Needed to actually run tests on
        This is not actually a test of "Add Vpool to OVS",
        so any failure here will be reported as a setUp error and no tests will run
        """
        pmachine = System.get_my_storagerouter().pmachine
        mgmt_center = MgmtCenter(
            data={
                'name': 'Openstack',
                'description': 'test',
                'username': OVSPluginTestCase.CINDER_USER,
                'password': OVSPluginTestCase.CINDER_PASS,
                'ip': OVSPluginTestCase.CINDER_CONTROLLER,
                'port': 80,
                'type': 'OPENSTACK',
                'metadata': {
                    'integratemgmt': True
                }
            })
        mgmt_center.save()
        pmachine.mgmtcenter = mgmt_center
        pmachine.save()
        self._debug('Creating vpool')

        parameters = {
            'storagerouter_ip': OVSPluginTestCase.ip,
            'vpool_name': OVSPluginTestCase.VPOOL_NAME,
            'type': 'local',
            'storage_ip': '127.0.0.1',  # KVM
            'vrouter_port': OVSPluginTestCase.VPOOL_PORT,
            'integrate_vpool': True,
            'connection_host': OVSPluginTestCase.ip,
            'connection_port': OVSPluginTestCase.VPOOL_PORT,
            'connection_username': '',
            'connection_password': '',
            'connection_backend': {},
            'readcache_size': 50,
            'writecache_size': 50
        }
        StorageRouterController.add_vpool(parameters)
        attempt = 0
        while attempt < 10:
            vpool = VPoolList.get_vpool_by_name(OVSPluginTestCase.VPOOL_NAME)
            if vpool is not None:
                self._debug('vpool {0} created'.format(
                    OVSPluginTestCase.VPOOL_NAME))
                try:
                    os.listdir(OVSPluginTestCase.VPOOL_MOUNTPOINT)
                    return vpool
                except Exception as ex:
                    # either it doesn't exist, or we don't have permission
                    self._debug('vpool not ready yet {0}'.format(str(ex)))
                    pass
            attempt += 1
            time.sleep(2)
        raise RuntimeError(
            'Vpool {0} was not modeled correctly or did not start.'.format(
                OVSPluginTestCase.VPOOL_NAME))
示例#30
0
def run_event_consumer():
    """
    Check whether to run the event consumer
    """
    rmq_config = RawConfigParser()
    rmq_config.read(os.path.join(Configuration.get('ovs.core.cfgdir'), 'rabbitmqclient.cfg'))
    machine_id = System.get_my_machine_id()
    return rmq_config.has_section(machine_id)
 def shrink_cluster(remaining_node_ip, deleted_node_ip, cluster_name):
     ai = ArakoonInstaller()
     ai.load_config_from(cluster_name, remaining_node_ip)
     client = SSHClient.load(deleted_node_ip)
     deleted_node_id = System.get_my_machine_id(client)
     ai.delete_dir_structure(client)
     ai.remove_node_from_config(deleted_node_id)
     ai.upload_config_for(cluster_name)
 def invalidate_vmachine_status(self, name):
     if not name.endswith('.xml'):
         return
     devicename = '{0}/{1}'.format(System.get_my_machine_id(), name)
     vm = VMachineList().get_by_devicename_and_vpool(devicename, None)
     if vm:
         vm.invalidate_dynamics()
         logger.debug('Hypervisor status invalidated for: {0}'.format(name))
示例#33
0
    def _get_free_ports(client):
        node_name = System.get_my_machine_id(client)
        clusters = []
        exclude_ports = []
        if Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            for cluster_name in Configuration.list(ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_name, False)
                config.load_config()
                for node in config.nodes:
                    if node.name == node_name:
                        clusters.append(cluster_name)
                        exclude_ports.append(node.client_port)
                        exclude_ports.append(node.messaging_port)

        ports = System.get_free_ports(Configuration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client)
        ArakoonInstaller._logger.debug('  Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters))
        return ports
示例#34
0
    def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True):
        """
        Extends a cluster to a given new node
        :param master_ip: IP of one of the already existing nodes
        :type master_ip: str

        :param new_ip: IP address of the node to be added
        :type new_ip: str

        :param cluster_name: Name of the cluster to be extended
        :type cluster_name: str

        :param base_dir: Base directory that will hold the db and tlogs
        :type base_dir: str

        :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool

        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip))
        base_dir = base_dir.rstrip('/')

        config = ArakoonClusterConfig(cluster_name)
        config.load_config()

        client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir])

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            if node_name not in [node.name for node in config.nodes]:
                config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                      ip=new_ip,
                                                      client_port=ports[0],
                                                      messaging_port=ports[1],
                                                      log_sinks=LogHandler.get_sink_path('arakoon_server'),
                                                      crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'),
                                                      home=home_dir,
                                                      tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
        return {'client_port': ports[0],
                'messaging_port': ports[1]}
示例#35
0
    def extend_cluster(master_ip, new_ip, cluster_name):
        """
        Extends a cluster to a given new node
        :param cluster_name: Name of the cluster to be extended
        :param new_ip: IP address of the node to be added
        :param master_ip: IP of one of the already existing nodes
        """
        logger.debug('Extending cluster "{0}" from {1} to {2}'.format(cluster_name, master_ip, new_ip))

        client = SSHClient(master_ip, username='******')
        if not EtcdInstaller._is_healty(cluster_name, client):
            raise RuntimeError('Cluster "{0}" unhealthy, aborting extend'.format(cluster_name))

        cluster_members = client.run('etcdctl member list').splitlines()
        for cluster_member in cluster_members:
            if EtcdInstaller.SERVER_URL.format(new_ip) in cluster_member:
                logger.info('Node {0} already member of etcd cluster'.format(new_ip))
                return

        current_cluster = []
        for item in client.run('etcdctl member list').splitlines():
            info = re.search(EtcdInstaller.MEMBER_REGEX, item).groupdict()
            current_cluster.append('{0}={1}'.format(info['name'], info['peer']))

        client = SSHClient(new_ip, username='******')
        node_name = System.get_my_machine_id(client)
        current_cluster.append('{0}={1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip)))

        data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR, cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        EtcdInstaller.stop(cluster_name, client)  # Stop a possible proxy service
        ServiceManager.add_service(base_name, client,
                                   params={'CLUSTER': cluster_name,
                                           'NODE_ID': node_name,
                                           'DATA_DIR': data_dir,
                                           'WAL_DIR': wal_dir,
                                           'SERVER_URL': EtcdInstaller.SERVER_URL.format(new_ip),
                                           'CLIENT_URL': EtcdInstaller.CLIENT_URL.format(new_ip),
                                           'LOCAL_CLIENT_URL': EtcdInstaller.CLIENT_URL.format('127.0.0.1'),
                                           'INITIAL_CLUSTER': ','.join(current_cluster),
                                           'INITIAL_STATE': 'existing',
                                           'INITIAL_PEERS': ''},
                                   target_name=target_name)

        master_client = SSHClient(master_ip, username='******')
        master_client.run('etcdctl member add {0} {1}'.format(node_name, EtcdInstaller.SERVER_URL.format(new_ip)))
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name, client)

        logger.debug('Extending cluster "{0}" from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
    def create_config(self, cluster_name, ip, client_port, messaging_port, plugins=None):
        """
        Creates initial config object causing this host to be master
        :param cluster_name: unique name for this arakoon cluster used in paths
        :param ip: ip on which service should listen
        :param client_port:
        :param messaging_port:
        :param plugins: optional arakoon plugins
        :return:
        """

        client = SSHClient.load(ip)
        node_name = System.get_my_machine_id(client)
        base_dir = System.read_remote_config(client, 'ovs.core.db.arakoon.location')
        self.clear_config()
        self.config = ClusterConfig(base_dir, cluster_name, 'info', plugins)
        self.config.nodes.append(ClusterNode(node_name, ip, client_port, messaging_port))
        self.config.target_ip = ip
示例#37
0
def run_event_consumer():
    """
    Check whether to run the event consumer
    """
    my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(System.get_my_machine_id()))
    for endpoint in EtcdConfiguration.get('/ovs/framework/messagequeue|endpoints'):
        if endpoint.startswith(my_ip):
            return True
    return False
示例#38
0
    def create_cluster(cluster_name, ip):
        """
        Creates a cluster
        :param base_dir: Base directory that should contain the data
        :param ip: IP address of the first node of the new cluster
        :param cluster_name: Name of the cluster
        """
        logger.debug('Creating cluster "{0}" on {1}'.format(cluster_name, ip))

        client = SSHClient(ip, username='******')
        node_name = System.get_my_machine_id(client)

        data_dir = EtcdInstaller.DATA_DIR.format(EtcdInstaller.DB_DIR,
                                                 cluster_name)
        wal_dir = EtcdInstaller.WAL_DIR.format(EtcdInstaller.DB_DIR,
                                               cluster_name)
        abs_paths = [data_dir, wal_dir]
        client.dir_delete(abs_paths)
        client.dir_create(abs_paths)
        client.dir_chmod(abs_paths, 0755, recursive=True)
        client.dir_chown(abs_paths, 'ovs', 'ovs', recursive=True)

        base_name = 'ovs-etcd'
        target_name = 'ovs-etcd-{0}'.format(cluster_name)
        ServiceManager.add_service(
            base_name,
            client,
            params={
                'CLUSTER':
                cluster_name,
                'NODE_ID':
                node_name,
                'DATA_DIR':
                data_dir,
                'WAL_DIR':
                wal_dir,
                'SERVER_URL':
                EtcdInstaller.SERVER_URL.format(ip),
                'CLIENT_URL':
                EtcdInstaller.CLIENT_URL.format(ip),
                'LOCAL_CLIENT_URL':
                EtcdInstaller.CLIENT_URL.format('127.0.0.1'),
                'INITIAL_CLUSTER':
                '{0}={1}'.format(node_name,
                                 EtcdInstaller.SERVER_URL.format(ip)),
                'INITIAL_STATE':
                'new',
                'INITIAL_PEERS':
                '-initial-advertise-peer-urls {0}'.format(
                    EtcdInstaller.SERVER_URL.format(ip))
            },
            target_name=target_name)
        EtcdInstaller.start(cluster_name, client)
        EtcdInstaller.wait_for_cluster(cluster_name, client)

        logger.debug('Creating cluster "{0}" on {1} completed'.format(
            cluster_name, ip))
    def start(self, daemon=True):
        """
        start all nodes in the cluster
        """
        from ovs.extensions.db.arakoon.arakoon.CheckArakoonTlogMark import CheckArakoonTlogMark

        CheckArakoonTlogMark().fixtlogs(self._clusterName, always_stop=True)
        node_name = System.get_my_machine_id()
        self._start_one_ex(node_name, daemon)
    def _gatherlocalnodes(self, cluster):
        """ gather all localnodes for all clusters """

        localnodes = [System.get_my_machine_id()]  # cluster.listLocalNodes()
        CheckArakoonTlogMark._speak('Found local nodes {0}'.format(localnodes))

        for localnode in localnodes:
            self._localnodesfiles[localnode] = dict()
            self._localnodesfiles[localnode]['cluster'] = cluster
    def start(self, daemon=True):
        """
        start all nodes in the cluster
        """
        from ovs.extensions.db.arakoon.CheckArakoonTlogMark import CheckArakoonTlogMark

        CheckArakoonTlogMark().fixtlogs(self._clusterName, always_stop=True)
        node_name = System.get_my_machine_id()
        self._start_one_ex(node_name, daemon)
    def _gatherlocalnodes(self, cluster):
        """ gather all localnodes for all clusters """

        localnodes = [System.get_my_machine_id()]  # cluster.listLocalNodes()
        CheckArakoonTlogMark._speak('Found local nodes {0}'.format(localnodes))

        for localnode in localnodes:
            self._localnodesfiles[localnode] = dict()
            self._localnodesfiles[localnode]['cluster'] = cluster
示例#43
0
    def tick(self):
        """
        Runs one iteration of the scheduler. This is guarded with a distributed lock
        """
        self._has_lock = False
        try:
            logger.debug('DS executing tick')
            self._mutex.acquire(wait=10)
            node_now = current_app._get_current_object().now()
            node_timestamp = time.mktime(node_now.timetuple())
            node_name = System.get_my_machine_id()
            try:
                lock = self._persistent.get('{0}_lock'.format(self._namespace))
            except KeyNotFoundException:
                lock = None
            if lock is None:
                # There is no lock yet, so the lock is acquired
                self._has_lock = True
                logger.debug('DS there was no lock in tick')
            else:
                if lock['name'] == node_name:
                    # The current node holds the lock
                    logger.debug('DS keeps own lock')
                    self._has_lock = True
                elif node_timestamp - lock['timestamp'] > DistributedScheduler.TIMEOUT:
                    # The current lock is timed out, so the lock is stolen
                    logger.debug('DS last lock refresh is {0}s old'.format(
                        node_timestamp - lock['timestamp']))
                    logger.debug(
                        'DS stealing lock from {0}'.format(lock['name']))
                    self._load_schedule()
                    self._has_lock = True
                else:
                    logger.debug('DS lock is not ours')
            if self._has_lock is True:
                lock = {'name': node_name,
                        'timestamp': node_timestamp}
                logger.debug('DS refreshing lock')
                self._persistent.set('{0}_lock'.format(self._namespace), lock)
        finally:
            self._mutex.release()

        if self._has_lock is True:
            logger.debug('DS executing tick workload')
            remaining_times = []
            try:
                for entry in self.schedule.itervalues():
                    next_time_to_run = self.maybe_due(entry, self.publisher)
                    if next_time_to_run:
                        remaining_times.append(next_time_to_run)
            except RuntimeError:
                pass
            logger.debug('DS executing tick workload - done')
            return min(remaining_times + [self.max_interval])
        else:
            return self.max_interval
示例#44
0
    def _create_vpool(self):
        """
        Needed to actually run tests on
        This is not actually a test of "Add Vpool to OVS",
        so any failure here will be reported as a setUp error and no tests will run
        """
        pmachine = System.get_my_storagerouter().pmachine
        mgmt_center = MgmtCenter(data={'name':'Openstack',
                                       'description':'test',
                                       'username':CINDER_USER,
                                       'password':CINDER_PASS,
                                       'ip':CINDER_CONTROLLER,
                                       'port':80,
                                       'type':'OPENSTACK',
                                       'metadata':{'integratemgmt':True}})
        mgmt_center.save()
        pmachine.mgmtcenter = mgmt_center
        pmachine.save()
        self._debug('Creating vpool')
        backend_type = 'local'
        fields = ['storage_ip', 'vrouter_port']

        parameters = {'storagerouter_ip': IP,
                      'vpool_name': VPOOL_NAME,
                      'type': 'local',
                      'mountpoint_bfs': VPOOL_BFS,
                      'mountpoint_temp': VPOOL_TEMP,
                      'mountpoint_md': VPOOL_MD,
                      'mountpoint_readcaches': [VPOOL_READCACHE],
                      'mountpoint_writecaches': [VPOOL_WRITECACHE],
                      'mountpoint_foc': VPOOL_FOC,
                      'storage_ip': '127.0.0.1', #KVM
                      'vrouter_port': VPOOL_PORT,
                      'integrate_vpool': True,
                      'connection_host': IP,
                      'connection_port': VPOOL_PORT,
                      'connection_username': '',
                      'connection_password': '',
                      'connection_backend': {},
                      }
        StorageRouterController.add_vpool(parameters)
        attempt = 0
        while attempt < 10:
            vpool = VPoolList.get_vpool_by_name(VPOOL_NAME)
            if vpool is not None:
                self._debug('vpool %s created' % VPOOL_NAME)
                try:
                    os.listdir(VPOOL_MOUNTPOINT)
                    return vpool
                except Exception as ex:
                    #either it doesn't exist, or we don't have permission
                    self._debug('vpool not ready yet %s' % (str(ex)))
                    pass
            attempt += 1
            time.sleep(2)
        raise RuntimeError('Vpool %s was not modeled correctly or did not start.' % VPOOL_NAME)
示例#45
0
def run_event_consumer():
    """
    Check whether to run the event consumer
    """
    rmq_config = RawConfigParser()
    rmq_config.read(
        os.path.join(Configuration.get('ovs.core.cfgdir'),
                     'rabbitmqclient.cfg'))
    machine_id = System.get_my_machine_id()
    return rmq_config.has_section(machine_id)
示例#46
0
    def _get_test_name():
        """
        Retrieve a structured environment test name

        :returns: a structured environment based test name
        :rtype: str
        """
        number_of_nodes = len(StoragerouterHelper.get_storagerouters())
        split_ip = System.get_my_storagerouter().ip.split('.')
        return str(number_of_nodes) + 'N-' + split_ip[2] + '.' + split_ip[3]
 def _get_free_ports(client):
     node_name = System.get_my_machine_id(client)
     clusters = []
     exclude_ports = []
     if EtcdConfiguration.dir_exists(ArakoonInstaller.ETCD_CONFIG_ROOT):
         for cluster_name in EtcdConfiguration.list(ArakoonInstaller.ETCD_CONFIG_ROOT):
             try:
                 config = ArakoonClusterConfig(cluster_name)
                 config.load_config()
                 for node in config.nodes:
                     if node.name == node_name:
                         clusters.append(cluster_name)
                         exclude_ports.append(node.client_port)
                         exclude_ports.append(node.messaging_port)
             except:
                 logger.error('  Could not load port information of cluster {0}'.format(cluster_name))
     ports = System.get_free_ports(EtcdConfiguration.get('/ovs/framework/hosts/{0}/ports|arakoon'.format(node_name)), exclude_ports, 2, client)
     logger.debug('  Loaded free ports {0} based on existing clusters {1}'.format(ports, clusters))
     return ports
示例#48
0
 def update_components(components):
     """
     Initiate the update through commandline for all StorageRouters
     This is called upon by the API
     :return: None
     """
     components = [component.strip() for component in components]
     root_client = SSHClient(endpoint=System.get_my_storagerouter(),
                             username='******')
     root_client.run(['ovs', 'update', ','.join(components)])
示例#49
0
    def _create_vpool(self):
        """
        Needed to actually run tests on
        This is not actually a test of "Add Vpool to OVS",
        so any failure here will be reported as a setUp error and no tests will run
        """
        pmachine = System.get_my_storagerouter().pmachine
        mgmt_center = MgmtCenter(
            data={
                "name": "Openstack",
                "description": "test",
                "username": CINDER_USER,
                "password": CINDER_PASS,
                "ip": CINDER_CONTROLLER,
                "port": 80,
                "type": "OPENSTACK",
                "metadata": {"integratemgmt": True},
            }
        )
        mgmt_center.save()
        pmachine.mgmtcenter = mgmt_center
        pmachine.save()
        self._debug("Creating vpool")

        parameters = {
            "storagerouter_ip": IP,
            "vpool_name": VPOOL_NAME,
            "type": "local",
            "storage_ip": "127.0.0.1",  # KVM
            "vrouter_port": VPOOL_PORT,
            "integrate_vpool": True,
            "connection_host": IP,
            "connection_port": VPOOL_PORT,
            "connection_username": "",
            "connection_password": "",
            "connection_backend": {},
            "readcache_size": 50,
            "writecache_size": 50,
        }
        StorageRouterController.add_vpool(parameters)
        attempt = 0
        while attempt < 10:
            vpool = VPoolList.get_vpool_by_name(VPOOL_NAME)
            if vpool is not None:
                self._debug("vpool {0} created".format(VPOOL_NAME))
                try:
                    os.listdir(VPOOL_MOUNTPOINT)
                    return vpool
                except Exception as ex:
                    # either it doesn't exist, or we don't have permission
                    self._debug("vpool not ready yet {0}".format(str(ex)))
                    pass
            attempt += 1
            time.sleep(2)
        raise RuntimeError("Vpool {0} was not modeled correctly or did not start.".format(VPOOL_NAME))
示例#50
0
    def create_cluster(cluster_name, ip, base_dir, plugins=None, locked=True):
        """
        Creates a cluster
        :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts)
        :param plugins: Plugins that should be added to the configuration file
        :param base_dir: Base directory that should contain the data and tlogs
        :param ip: IP address of the first node of the new cluster
        :param cluster_name: Name of the cluster
        """
        logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip))
        base_dir = base_dir.rstrip('/')

        client = SSHClient(ip)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(
            base_dir, cluster_name)
        log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(
            base_dir, cluster_name)

        ArakoonInstaller.archive_existing_arakoon_data(
            ip, home_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir),
            cluster_name)
        ArakoonInstaller.archive_existing_arakoon_data(
            ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''),
            cluster_name)
        ArakoonInstaller.archive_existing_arakoon_data(
            ip, tlog_dir, ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir),
            cluster_name)
        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import VolatileMutex
                port_mutex = VolatileMutex(
                    'arakoon_install_ports_{0}'.format(ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            config = ArakoonClusterConfig(cluster_name, plugins)
            config.nodes.append(
                ArakoonNodeConfig(name=node_name,
                                  ip=ip,
                                  client_port=ports[0],
                                  messaging_port=ports[1],
                                  log_dir=log_dir,
                                  home=home_dir,
                                  tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)
        finally:
            if port_mutex is not None:
                port_mutex.release()

        logger.debug('Creating cluster {0} on {1} completed'.format(
            cluster_name, ip))
        return {'client_port': ports[0], 'messaging_port': ports[1]}
示例#51
0
    def extend_cluster(master_ip, new_ip, cluster_name, base_dir):
        """
        Extends a cluster to a given new node
        :param base_dir: Base directory that will hold the db and tlogs
        :param cluster_name: Name of the cluster to be extended
        :param new_ip: IP address of the node to be added
        :param master_ip: IP of one of the already existing nodes
        """
        logger.debug('Extending cluster {0} from {1} to {2}'.format(
            cluster_name, master_ip, new_ip))
        base_dir = base_dir.rstrip('/')
        from ovs.extensions.generic.volatilemutex import VolatileMutex
        port_mutex = VolatileMutex('arakoon_install_ports_{0}'.format(new_ip))

        config = ArakoonClusterConfig(cluster_name)
        config.load_config()

        client = SSHClient(new_ip)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(
            base_dir, cluster_name)
        log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(
            base_dir, cluster_name)

        ArakoonInstaller.archive_existing_arakoon_data(
            new_ip, home_dir,
            ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name)
        ArakoonInstaller.archive_existing_arakoon_data(
            new_ip, log_dir, ArakoonInstaller.ARAKOON_LOG_DIR.format(''),
            cluster_name)
        ArakoonInstaller.archive_existing_arakoon_data(
            new_ip, tlog_dir,
            ArakoonInstaller.ARAKOON_BASE_DIR.format(base_dir), cluster_name)

        try:
            port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            if node_name not in [node.name for node in config.nodes]:
                config.nodes.append(
                    ArakoonNodeConfig(name=node_name,
                                      ip=new_ip,
                                      client_port=ports[0],
                                      messaging_port=ports[1],
                                      log_dir=log_dir,
                                      home=home_dir,
                                      tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)
        finally:
            port_mutex.release()

        logger.debug('Extending cluster {0} from {1} to {2} completed'.format(
            cluster_name, master_ip, new_ip))
        return {'client_port': ports[0], 'messaging_port': ports[1]}
示例#52
0
def run_event_consumer():
    """
    Check whether to run the event consumer
    """
    my_ip = EtcdConfiguration.get('/ovs/framework/hosts/{0}/ip'.format(
        System.get_my_machine_id()))
    for endpoint in EtcdConfiguration.get(
            '/ovs/framework/messagequeue|endpoints'):
        if endpoint.startswith(my_ip):
            return True
    return False
示例#53
0
    def _process_task(task, metadata, servicemanager):
        """
        Processes a task
        """
        try:
            SupportAgent._logger.debug('Processing: {0}'.format(task))
            cid = Configuration.get('/ovs/framework/cluster_id').replace(
                r"'", r"'\''")
            nid = System.get_my_machine_id().replace(r"'", r"'\''")

            if task == 'OPEN_TUNNEL':
                if servicemanager == 'upstart':
                    check_output('service openvpn stop', shell=True)
                else:
                    check_output(
                        "systemctl stop 'openvpn@ovs_{0}-{1}' || true".format(
                            cid, nid),
                        shell=True)
                check_output('rm -f /etc/openvpn/ovs_*', shell=True)
                for filename, contents in metadata['files'].iteritems():
                    with open(filename, 'w') as the_file:
                        the_file.write(base64.b64decode(contents))
                if servicemanager == 'upstart':
                    check_output('service openvpn start', shell=True)
                else:
                    check_output(
                        "systemctl start 'openvpn@ovs_{0}-{1}'".format(
                            cid, nid),
                        shell=True)
            elif task == 'CLOSE_TUNNEL':
                if servicemanager == 'upstart':
                    check_output('service openvpn stop', shell=True)
                else:
                    check_output("systemctl stop 'openvpn@ovs_{0}-{1}'".format(
                        cid, nid),
                                 shell=True)
                check_output('rm -f /etc/openvpn/ovs_*', shell=True)
            elif task == 'UPLOAD_LOGFILES':
                logfile = check_output('ovs collect logs', shell=True).strip()
                check_output(
                    "mv '{0}' '/tmp/{1}'; curl -T '/tmp/{1}' 'ftp://{2}' --user '{3}:{4}'; rm -f '{0}' '/tmp/{1}'"
                    .format(logfile.replace(r"'", r"'\''"),
                            metadata['filename'].replace(r"'", r"'\''"),
                            metadata['endpoint'].replace(r"'", r"'\''"),
                            metadata['user'].replace(r"'", r"'\''"),
                            metadata['password'].replace(r"'", r"'\''")),
                    shell=True)
            else:
                raise RuntimeError('Unknown task')
        except Exception, ex:
            SupportAgent._logger.exception(
                'Unexpected error while processing task {0} (data: {1}): {2}'.
                format(task, json.dumps(metadata), ex))
            raise
 def get_healthcheck_version():
     """
     Gets the installed healthcheck version
     :return: version number of the installed healthcheck
     :rtype: str
     """
     client = SSHClient(System.get_my_storagerouter())
     package_name = 'openvstorage-health-check'
     package_manager = PackageFactory.get_manager()
     packages = package_manager.get_installed_versions(
         client=client, package_names=[package_name])
     return packages.get(package_name, 'unknown')
示例#55
0
    def install_plugins():
        """
        (Re)load plugins
        """
        if ServiceManager.has_service('ovs-watcher-framework',
                                      SSHClient('127.0.0.1', username='******')):
            # If the watcher is running, 'ovs setup' was executed and we need to restart everything to load
            # the plugin. In the other case, the plugin will be loaded once 'ovs setup' is executed
            from ovs.dal.lists.storagerouterlist import StorageRouterList
            clients = []
            try:
                for storagerouter in StorageRouterList.get_storagerouters():
                    clients.append(SSHClient(storagerouter, username='******'))
            except UnableToConnectException:
                raise RuntimeError('Not all StorageRouters are reachable')

            for client in clients:
                for service_name in ['watcher-framework', 'memcached']:
                    ServiceManager.stop_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(
                                service_name, client=client) is False:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError(
                            'Could not stop service: {0}'.format(service_name))

            for client in clients:
                for service_name in ['memcached', 'watcher-framework']:
                    ServiceManager.start_service(service_name, client=client)
                    wait = 30
                    while wait > 0:
                        if ServiceManager.get_service_status(
                                service_name, client=client) is True:
                            break
                        time.sleep(1)
                        wait -= 1
                    if wait == 0:
                        raise RuntimeError(
                            'Could not start service: {0}'.format(
                                service_name))

            from ovs.dal.helpers import Migration
            Migration.migrate()

            from ovs.lib.helpers.toolbox import Toolbox
            ip = System.get_my_storagerouter().ip
            functions = Toolbox.fetch_hooks('plugin', 'postinstall')
            for function in functions:
                function(ip=ip)
示例#56
0
    def __init__(self, path=None, client=None):
        """

        :param path: path of the fstab file
        :type path: str
        """
        if path:
            self._path = path
        else:
            self._path = self.DEFAULT_PATH
        if client is None:
            client = SSHClient(System.get_my_storagerouter(), username='******')
        self.client = client