Пример #1
0
 def _backend_property(self, function, dynamic):
     """
     Handles the internal caching of dynamic properties
     """
     caller_name = dynamic.name
     cache_key = '{0}_{1}'.format(self._key, caller_name)
     mutex = volatile_mutex(cache_key)
     try:
         cached_data = self._volatile.get(cache_key)
         if cached_data is None:
             if dynamic.locked:
                 mutex.acquire()
                 cached_data = self._volatile.get(cache_key)
             if cached_data is None:
                 function_info = inspect.getargspec(function)
                 if 'dynamic' in function_info.args:
                     cached_data = function(dynamic=dynamic)  # Load data from backend
                 else:
                     cached_data = function()
                 if cached_data is not None:
                     correct, allowed_types, given_type = Toolbox.check_type(cached_data, dynamic.return_type)
                     if not correct:
                         raise TypeError('Dynamic property {0} allows types {1}. {2} given'.format(
                             caller_name, str(allowed_types), given_type
                         ))
                 if dynamic.timeout > 0:
                     self._volatile.set(cache_key, cached_data, dynamic.timeout)
         return cached_data
     finally:
         mutex.release()
Пример #2
0
    def pulse():
        """
        Update the heartbeats for the Current Routers
        :return: None
        """
        logger = LogHandler.get('extensions', name='heartbeat')
        machine_id = System.get_my_machine_id()
        current_time = int(time.time())

        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.machine_id == machine_id:
                with volatile_mutex('storagerouter_heartbeat_{0}'.format(node.guid)):
                    node_save = StorageRouter(node.guid)
                    node_save.heartbeats['process'] = current_time
                    node_save.save()
                StorageRouterController.ping.s(node.guid, current_time).apply_async(routing_key='sr.{0}'.format(machine_id))
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats['process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d '{0}'".format(node.name.replace(r"'", r"'\''")), shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
Пример #3
0
    def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True):
        """
        Retrieve arakoon cluster information based on its type
        :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str

        :param locked: Execute this in a locked context
        :type locked: bool

        :return: List of ArakoonClusterMetadata objects
        :rtype: ArakoonClusterMetadata
        """
        cluster_type = cluster_type.upper()
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Unsupported arakoon cluster type provided. Please choose from {0}'.format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))
        if not EtcdConfiguration.dir_exists('/ovs/arakoon'):
            return None

        mutex = volatile_mutex('claim_arakoon_metadata', wait=10)
        try:
            if locked is True:
                mutex.acquire()

            for cluster_name in EtcdConfiguration.list('/ovs/arakoon'):
                metadata = ArakoonClusterMetadata(cluster_id=cluster_name)
                metadata.load_metadata()
                if metadata.cluster_type == cluster_type and metadata.in_use is False and metadata.internal is False:
                    metadata.claim()
                    return metadata
        finally:
            if locked is True:
                mutex.release()
Пример #4
0
        def new_function(*args, **kwargs):
            """
            Wrapped function
            """
            request = _find_request(args)

            now = time.time()
            key = 'ovs_api_limit_{0}.{1}_{2}'.format(
                f.__module__, f.__name__,
                request.META['HTTP_X_REAL_IP']
            )
            client = VolatileFactory.get_client()
            with volatile_mutex(key):
                rate_info = client.get(key, {'calls': [],
                                             'timeout': None})
                active_timeout = rate_info['timeout']
                if active_timeout is not None:
                    if active_timeout > now:
                        logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, active_timeout - now))
                        raise HttpTooManyRequestsException(error='rate_limit_timeout',
                                                           error_description='Rate limit timeout ({0}s remaining)'.format(round(active_timeout - now, 2)))
                    else:
                        rate_info['timeout'] = None
                rate_info['calls'] = [call for call in rate_info['calls'] if call > (now - per)] + [now]
                calls = len(rate_info['calls'])
                if calls > amount:
                    rate_info['timeout'] = now + timeout
                    client.set(key, rate_info)
                    logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, timeout))
                    raise HttpTooManyRequestsException(error='rate_limit_reached',
                                                       error_description='Rate limit reached ({0} in last {1}s)'.format(calls, per))
                client.set(key, rate_info)
            return f(*args, **kwargs)
Пример #5
0
 def _backend_property(self, function, dynamic):
     """
     Handles the internal caching of dynamic properties
     """
     caller_name = dynamic.name
     cache_key = '{0}_{1}'.format(self._key, caller_name)
     mutex = volatile_mutex(cache_key)
     try:
         cached_data = self._volatile.get(cache_key)
         if cached_data is None:
             if dynamic.locked:
                 mutex.acquire()
                 cached_data = self._volatile.get(cache_key)
             if cached_data is None:
                 function_info = inspect.getargspec(function)
                 if 'dynamic' in function_info.args:
                     cached_data = function(
                         dynamic=dynamic)  # Load data from backend
                 else:
                     cached_data = function()
                 if cached_data is not None:
                     correct, allowed_types, given_type = Toolbox.check_type(
                         cached_data, dynamic.return_type)
                     if not correct:
                         raise TypeError(
                             'Dynamic property {0} allows types {1}. {2} given'
                             .format(caller_name, str(allowed_types),
                                     given_type))
                 if dynamic.timeout > 0:
                     self._volatile.set(cache_key, cached_data,
                                        dynamic.timeout)
         return cached_data
     finally:
         mutex.release()
Пример #6
0
 def update_value(key, append, value_to_update=None):
     """
     Store the specified value in the PersistentFactory
     :param key:             Key to store the value for
     :param append:          If True, the specified value will be appended else element at index 0 will be popped
     :param value_to_update: Value to append to the list or remove from the list
     :return:                Updated value
     """
     with volatile_mutex(name=key, wait=5):
         if persistent_client.exists(key):
             val = persistent_client.get(key)
             if append is True and value_to_update is not None:
                 val['values'].append(value_to_update)
             elif append is False and value_to_update is not None:
                 for value_item in val['values']:
                     if value_item == value_to_update:
                         val['values'].remove(value_item)
                         break
             elif append is False and len(val['values']) > 0:
                 val['values'].pop(0)
         else:
             log_message('Setting initial value for key {0}'.format(
                 persistent_key))
             val = {'mode': mode, 'values': []}
         persistent_client.set(key, val)
     return val
Пример #7
0
    def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True):
        """
        Retrieve arakoon cluster information based on its type
        :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str

        :param locked: Execute this in a locked context
        :type locked: bool

        :return: List of ArakoonClusterMetadata objects
        :rtype: ArakoonClusterMetadata
        """
        cluster_type = cluster_type.upper()
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Unsupported arakoon cluster type provided. Please choose from {0}'.format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))
        if not EtcdConfiguration.dir_exists('/ovs/arakoon'):
            return None

        mutex = volatile_mutex('claim_arakoon_metadata', wait=10)
        try:
            if locked is True:
                mutex.acquire()

            for cluster_name in EtcdConfiguration.list('/ovs/arakoon'):
                metadata = ArakoonClusterMetadata(cluster_id=cluster_name)
                metadata.load_metadata()
                if metadata.cluster_type == cluster_type and metadata.in_use is False and metadata.internal is False:
                    metadata.claim()
                    return metadata
        finally:
            if locked is True:
                mutex.release()
Пример #8
0
    def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True):
        """
        Extends a cluster to a given new node
        :param master_ip: IP of one of the already existing nodes
        :type master_ip: str

        :param new_ip: IP address of the node to be added
        :type new_ip: str

        :param cluster_name: Name of the cluster to be extended
        :type cluster_name: str

        :param base_dir: Base directory that will hold the db and tlogs
        :type base_dir: str

        :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool

        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip))
        base_dir = base_dir.rstrip('/')

        config = ArakoonClusterConfig(cluster_name)
        config.load_config()

        client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(new_ip, {log_dir: True,
                                                              home_dir: False,
                                                              tlog_dir: False})

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            if node_name not in [node.name for node in config.nodes]:
                config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                      ip=new_ip,
                                                      client_port=ports[0],
                                                      messaging_port=ports[1],
                                                      log_dir=log_dir,
                                                      home=home_dir,
                                                      tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
        return {'client_port': ports[0],
                'messaging_port': ports[1]}
Пример #9
0
    def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True):
        """
        Retrieve arakoon cluster information based on its type
        :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str
        :param locked: Execute this in a locked context
        :type locked: bool
        :return: Metadata of the arakoon cluster
        :rtype: dict
        """
        cluster_type = cluster_type.upper()
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Unsupported arakoon cluster type provided. Please choose from {0}'.format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))
        if not Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            return None

        mutex = volatile_mutex('claim_arakoon_metadata', wait=10)
        try:
            if locked is True:
                mutex.acquire()

            for cluster_name in Configuration.list(ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_id=cluster_name, filesystem=False)
                config.load_config()
                arakoon_client = ArakoonInstaller.build_client(config)
                if arakoon_client.exists(ArakoonInstaller.METADATA_KEY):
                    metadata = json.loads(arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if metadata['cluster_type'] == cluster_type and metadata['in_use'] is False and metadata['internal'] is False:
                        metadata['in_use'] = True
                        arakoon_client.set(ArakoonInstaller.METADATA_KEY, json.dumps(metadata, indent=4))
                        return metadata
        finally:
            if locked is True:
                mutex.release()
Пример #10
0
        def new_function(*args, **kwargs):
            """
            Wrapped function
            """
            request = _find_request(args)

            now = time.time()
            key = 'ovs_api_limit_{0}.{1}_{2}'.format(
                f.__module__, f.__name__,
                request.META['HTTP_X_REAL_IP']
            )
            client = VolatileFactory.get_client()
            with volatile_mutex(key):
                rate_info = client.get(key, {'calls': [],
                                             'timeout': None})
                active_timeout = rate_info['timeout']
                if active_timeout is not None:
                    if active_timeout > now:
                        logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, active_timeout - now))
                        raise HttpTooManyRequestsException(error='rate_limit_timeout',
                                                           error_description='Rate limit timeout ({0}s remaining)'.format(round(active_timeout - now, 2)))
                    else:
                        rate_info['timeout'] = None
                rate_info['calls'] = [call for call in rate_info['calls'] if call > (now - per)] + [now]
                calls = len(rate_info['calls'])
                if calls > amount:
                    rate_info['timeout'] = now + timeout
                    client.set(key, rate_info)
                    logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, timeout))
                    raise HttpTooManyRequestsException(error='rate_limit_reached',
                                                       error_description='Rate limit reached ({0} in last {1}s)'.format(calls, per))
                client.set(key, rate_info)
            return f(*args, **kwargs)
Пример #11
0
 def update_value(key, append, value_to_update=None):
     """
     Store the specified value in the PersistentFactory
     :param key:             Key to store the value for
     :param append:          If True, the specified value will be appended else element at index 0 will be popped
     :param value_to_update: Value to append to the list or remove from the list
     :return:                Updated value
     """
     with volatile_mutex(name=key, wait=5):
         if persistent_client.exists(key):
             val = persistent_client.get(key)
             if append is True and value_to_update is not None:
                 val['values'].append(value_to_update)
             elif append is False and value_to_update is not None:
                 for value_item in val['values']:
                     if value_item == value_to_update:
                         val['values'].remove(value_item)
                         break
             elif append is False and len(val['values']) > 0:
                 val['values'].pop(0)
             log_message('Amount of jobs pending for key {0}: {1}'.format(key, len(val['values'])))
             for kwarg in val['values']:
                 log_message('  KWARGS: {0}'.format(kwarg['kwargs']))
         else:
             log_message('Setting initial value for key {0}'.format(key))
             val = {'mode': mode,
                    'values': []}
         persistent_client.set(key, val)
     return val
Пример #12
0
 def wrapped(*args, **kwargs):
     if lock_type == 'local':
         _mutex = file_mutex(key)
     elif lock_type == 'cluster':
         _mutex = volatile_mutex(key)
     else:
         raise ValueError(
             'Lock type {0} is not supported!'.format(lock_type))
     try:
         _mutex.acquire(wait=0.005)
         local_sr = System.get_my_storagerouter()
         CacheHelper.set(key=key,
                         item={
                             'ip': local_sr.ip,
                             'hostname': local_sr.name
                         },
                         expire_time=60)
         return func(*args, **kwargs)
     except (NoFileLockAvailableException,
             NoVolatileLockAvailableException):
         if callback is None:
             return
         else:
             executor_info = None
             start = time.time()
             while executor_info is None:
                 # Calculated guesswork. If a callback function would be expected, the acquire has happened for another executor  the volatilekey should be set eventually
                 # However by setting it after the acquire, the callback executor and original method executor can race between fetch and set
                 # A better implementation would be relying on the fwk ensure_single_decorator as they check for various races themselves
                 # This is just a poor mans, temporary implementation
                 if start - time.time() > 5:
                     raise ValueError(
                         'Timed out after 5 seconds while fetching the information about the executor.'
                     )
                 try:
                     executor_info = CacheHelper.get(key=key)
                 except:
                     pass
             callback_func = callback.__func__ if isinstance(
                 callback, staticmethod) else callback
             argnames = inspect.getargspec(callback_func)[0]
             arguments = list(args)
             kwargs.update({'test_name': func.__name__})
             if executor_info is not None:
                 kwargs.update(executor_info)
                 if 'result_handler' in argnames:
                     result_handler = kwargs.get('result_handler')
                     for index, arg in enumerate(arguments):
                         if isinstance(arg,
                                       HCResults.HCResultCollector):
                             result_handler = arguments.pop(index)
                             break
                     if result_handler is None:
                         raise TypeError(
                             'Expected an instance of {}'.format(
                                 type(HCResults.HCResultCollector)))
                     kwargs['result_handler'] = result_handler
             return callback_func(*tuple(arguments), **kwargs)
     finally:
         _mutex.release()
Пример #13
0
def _clean_cache():
    ovs_logger.info('Executing celery "clear_cache" startup script...')
    from ovs.lib.helpers.decorators import ENSURE_SINGLE_KEY
    active = inspect().active()
    active_tasks = []
    if active is not None:
        for tasks in active.itervalues():
            active_tasks += [task['id'] for task in tasks]
    cache = PersistentFactory.get_client()
    for key in cache.prefix(ENSURE_SINGLE_KEY):
        try:
            with volatile_mutex(name=key, wait=5):
                entry = cache.get(key)
                values = entry.get('values', [])
                new_values = []
                for v in values:
                    task_id = v.get('task_id')
                    if task_id is not None and task_id in active_tasks:
                        new_values.append(v)
                if len(new_values) > 0:
                    entry['values'] = new_values
                    cache.set(key, entry)
                    ovs_logger.info('Updated key {0}'.format(key))
                else:
                    cache.delete(key)
                    ovs_logger.info('Deleted key {0}'.format(key))
        except KeyNotFoundException:
            pass
    ovs_logger.info('Executing celery "clear_cache" startup script... done')
Пример #14
0
    def pulse():
        """
        Update the heartbeats for the Current Routers
        :return: None
        """
        logger = Logger('extensions-generic')
        machine_id = System.get_my_machine_id()
        current_time = int(time.time())

        routers = StorageRouterList.get_storagerouters()
        for node in routers:
            if node.machine_id == machine_id:
                with volatile_mutex('storagerouter_heartbeat_{0}'.format(
                        node.guid)):
                    node_save = StorageRouter(node.guid)
                    node_save.heartbeats['process'] = current_time
                    node_save.save()
                StorageRouterController.ping.s(
                    node.guid, current_time).apply_async(
                        routing_key='sr.{0}'.format(machine_id))
            else:
                try:
                    # check timeout of other nodes and clear arp cache
                    if node.heartbeats and 'process' in node.heartbeats:
                        if current_time - node.heartbeats[
                                'process'] >= HeartBeat.ARP_TIMEOUT:
                            check_output("/usr/sbin/arp -d '{0}'".format(
                                node.name.replace(r"'", r"'\''")),
                                         shell=True)
                except CalledProcessError:
                    logger.exception('Error clearing ARP cache')
Пример #15
0
 def _unregister_task(self, task_data=None, delete=False):
     # type: (dict, bool) -> None
     """
     Unregisters the execution of the task
     :param task_data: Dict containing all information about the task to unregister
     :type task_data: dict
     :param delete: Delete the registration key
     :type delete: bool
     :return: None
     """
     # @todo use transaction
     with volatile_mutex(self.persistent_key, wait=5):
         if task_data:
             current_registrations, initial_registrations = self.get_task_registrations(
             )
             try:
                 current_registrations.remove(task_data)
                 self.persistent_client.set(self.task_registration_key,
                                            current_registrations)
             except ValueError:
                 # Registration was already removed
                 pass
         if delete:
             self.logger.info(
                 self.message.format('Deleting key {0}'.format(
                     self.task_registration_key)))
             self.persistent_client.delete(self.task_registration_key,
                                           must_exist=False)
Пример #16
0
 def new_function(self, request, *args, **kwargs):
     """
     Wrapped function
     """
     now = time.time()
     key = 'ovs_api_limit_{0}.{1}_{2}'.format(
         f.__module__, f.__name__,
         request.META['HTTP_X_REAL_IP']
     )
     client = VolatileFactory.get_client()
     mutex = volatile_mutex(key)
     try:
         mutex.acquire()
         rate_info = client.get(key, {'calls': [],
                                      'timeout': None})
         active_timeout = rate_info['timeout']
         if active_timeout is not None:
             if active_timeout > now:
                 logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, active_timeout - now))
                 return HttpResponse, {'error_code': 'rate_limit_timeout',
                                       'error': 'Rate limit timeout ({0}s remaining)'.format(round(active_timeout - now, 2))}, 429
             else:
                 rate_info['timeout'] = None
         rate_info['calls'] = [call for call in rate_info['calls'] if call > (now - per)] + [now]
         calls = len(rate_info['calls'])
         if calls > amount:
             rate_info['timeout'] = now + timeout
             client.set(key, rate_info)
             logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, timeout))
             return HttpResponse, {'error_code': 'rate_limit_reached',
                                   'error': 'Rate limit reached ({0} in last {1}s)'.format(calls, per)}, 429
         client.set(key, rate_info)
     finally:
         mutex.release()
     return f(self, request, *args, **kwargs)
Пример #17
0
        def new_function(*args, **kwargs):
            """
            Wrapped function
            """
            request = _find_request(args)

            now = time.time()
            key = 'ovs_api_limit_{0}.{1}_{2}'.format(
                f.__module__, f.__name__, request.META['HTTP_X_REAL_IP'])
            client = VolatileFactory.get_client()
            with volatile_mutex(key):
                rate_info = client.get(key, {'calls': [], 'timeout': None})
                active_timeout = rate_info['timeout']
                if active_timeout is not None:
                    if active_timeout > now:
                        logger.warning(
                            'Call {0} is being throttled with a wait of {1}'.
                            format(key, active_timeout - now))
                        raise Throttled(wait=active_timeout - now)
                    else:
                        rate_info['timeout'] = None
                rate_info['calls'] = [
                    call for call in rate_info['calls'] if call > (now - per)
                ] + [now]
                calls = len(rate_info['calls'])
                if calls > amount:
                    rate_info['timeout'] = now + timeout
                    client.set(key, rate_info)
                    logger.warning(
                        'Call {0} is being throttled with a wait of {1}'.
                        format(key, timeout))
                    raise Throttled(wait=timeout)
                client.set(key, rate_info)
            return f(*args, **kwargs)
Пример #18
0
        def new_function(*args, **kwargs):
            """
            Wrapped function
            """
            request = _find_request(args)

            now = time.time()
            key = 'ovs_api_limit_{0}.{1}_{2}'.format(
                f.__module__, f.__name__,
                request.META['HTTP_X_REAL_IP']
            )
            client = VolatileFactory.get_client()
            with volatile_mutex(key):
                rate_info = client.get(key, {'calls': [],
                                             'timeout': None})
                active_timeout = rate_info['timeout']
                if active_timeout is not None:
                    if active_timeout > now:
                        logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, active_timeout - now))
                        raise Throttled(wait=active_timeout - now)
                    else:
                        rate_info['timeout'] = None
                rate_info['calls'] = [call for call in rate_info['calls'] if call > (now - per)] + [now]
                calls = len(rate_info['calls'])
                if calls > amount:
                    rate_info['timeout'] = now + timeout
                    client.set(key, rate_info)
                    logger.warning('Call {0} is being throttled with a wait of {1}'.format(key, timeout))
                    raise Throttled(wait=timeout)
                client.set(key, rate_info)
            return f(*args, **kwargs)
Пример #19
0
    def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True):
        """
        Extends a cluster to a given new node
        :param master_ip: IP of one of the already existing nodes
        :type master_ip: str

        :param new_ip: IP address of the node to be added
        :type new_ip: str

        :param cluster_name: Name of the cluster to be extended
        :type cluster_name: str

        :param base_dir: Base directory that will hold the db and tlogs
        :type base_dir: str

        :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool

        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip))
        base_dir = base_dir.rstrip('/')

        config = ArakoonClusterConfig(cluster_name)
        config.load_config()

        client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir])

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            if node_name not in [node.name for node in config.nodes]:
                config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                      ip=new_ip,
                                                      client_port=ports[0],
                                                      messaging_port=ports[1],
                                                      log_sinks=LogHandler.get_sink_path('arakoon_server'),
                                                      crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'),
                                                      home=home_dir,
                                                      tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
        return {'client_port': ports[0],
                'messaging_port': ports[1]}
Пример #20
0
 def __init__(self, *args, **kwargs):
     """
     Initializes the distributed scheduler
     """
     self._logger = LogHandler.get('celery', name='celery beat')
     self._persistent = PersistentFactory.get_client()
     self._namespace = 'ovs_celery_beat'
     self._mutex = volatile_mutex('celery_beat', 10)
     self._has_lock = False
     super(DistributedScheduler, self).__init__(*args, **kwargs)
     self._logger.debug('DS init')
Пример #21
0
 def __init__(self, *args, **kwargs):
     """
     Initializes the distributed scheduler
     """
     self._logger = LogHandler.get('celery', name='celery beat')
     self._persistent = PersistentFactory.get_client()
     self._namespace = 'ovs_celery_beat'
     self._mutex = volatile_mutex('celery_beat', 10)
     self._has_lock = False
     super(DistributedScheduler, self).__init__(*args, **kwargs)
     self._logger.debug('DS init')
Пример #22
0
    def create(self):
        """
        Prepares a new Storagedriver for a given vPool and Storagerouter
        :return: None
        :rtype: NoneType
        """
        if self.sr_installer is None:
            raise RuntimeError('No StorageRouterInstaller instance found')

        machine_id = System.get_my_machine_id(client=self.sr_installer.root_client)
        port_range = Configuration.get('/ovs/framework/hosts/{0}/ports|storagedriver'.format(machine_id))
        storagerouter = self.sr_installer.storagerouter
        with volatile_mutex('add_vpool_get_free_ports_{0}'.format(machine_id), wait=30):
            model_ports_in_use = []
            for sd in StorageDriverList.get_storagedrivers():
                if sd.storagerouter_guid == storagerouter.guid:
                    model_ports_in_use += sd.ports.values()
                    for proxy in sd.alba_proxies:
                        model_ports_in_use.append(proxy.service.ports[0])
            ports = System.get_free_ports(selected_range=port_range, exclude=model_ports_in_use, amount=4 + self.sr_installer.requested_proxies, client=self.sr_installer.root_client)

            vpool = self.vp_installer.vpool
            vrouter_id = '{0}{1}'.format(vpool.name, machine_id)
            storagedriver = StorageDriver()
            storagedriver.name = vrouter_id.replace('_', ' ')
            storagedriver.ports = {'management': ports[0],
                                   'xmlrpc': ports[1],
                                   'dtl': ports[2],
                                   'edge': ports[3]}
            storagedriver.vpool = vpool
            storagedriver.cluster_ip = Configuration.get('/ovs/framework/hosts/{0}/ip'.format(machine_id))
            storagedriver.storage_ip = self.storage_ip
            storagedriver.mountpoint = '/mnt/{0}'.format(vpool.name)
            storagedriver.description = storagedriver.name
            storagedriver.storagerouter = storagerouter
            storagedriver.storagedriver_id = vrouter_id
            storagedriver.save()

            # ALBA Proxies
            proxy_service_type = ServiceTypeList.get_by_name(ServiceType.SERVICE_TYPES.ALBA_PROXY)
            for proxy_id in xrange(self.sr_installer.requested_proxies):
                service = Service()
                service.storagerouter = storagerouter
                service.ports = [ports[4 + proxy_id]]
                service.name = 'albaproxy_{0}_{1}'.format(vpool.name, proxy_id)
                service.type = proxy_service_type
                service.save()
                alba_proxy = AlbaProxy()
                alba_proxy.service = service
                alba_proxy.storagedriver = storagedriver
                alba_proxy.save()
        self.storagedriver = storagedriver
Пример #23
0
 def __init__(self, *args, **kwargs):
     """
     Initializes the distributed scheduler
     """
     self._mutex = volatile_mutex('celery_beat', 10)
     self._logger = Logger('celery')
     self._has_lock = False
     self._lock_name = 'ovs_celery_beat_lock'
     self._entry_name = 'ovs_celery_beat_entries'
     self._persistent = PersistentFactory.get_client()
     self._schedule_info = {}
     super(DistributedScheduler, self).__init__(*args, **kwargs)
     self._logger.debug('DS init')
Пример #24
0
 def new_function(*args, **kw):
     """
     Executes the decorated function in a locked context
     """
     filemutex = file_mutex('messaging')
     try:
         filemutex.acquire(wait=60)
         mutex = volatile_mutex('messaging')
         try:
             mutex.acquire(wait=60)
             return f(*args, **kw)
         finally:
             mutex.release()
     finally:
         filemutex.release()
Пример #25
0
 def new_function(*args, **kw):
     """
     Executes the decorated function in a locked context
     """
     filemutex = file_mutex('messaging')
     try:
         filemutex.acquire(wait=60)
         mutex = volatile_mutex('messaging')
         try:
             mutex.acquire(wait=60)
             return f(*args, **kw)
         finally:
             mutex.release()
     finally:
         filemutex.release()
Пример #26
0
 def new_function(self, request, *args, **kwargs):
     """
     Wrapped function
     """
     now = time.time()
     key = 'ovs_api_limit_{0}.{1}_{2}'.format(
         f.__module__, f.__name__, request.META['HTTP_X_REAL_IP'])
     client = VolatileFactory.get_client()
     mutex = volatile_mutex(key)
     try:
         mutex.acquire()
         rate_info = client.get(key, {'calls': [], 'timeout': None})
         active_timeout = rate_info['timeout']
         if active_timeout is not None:
             if active_timeout > now:
                 logger.warning(
                     'Call {0} is being throttled with a wait of {1}'.
                     format(key, active_timeout - now))
                 return HttpResponse, {
                     'error_code':
                     'rate_limit_timeout',
                     'error':
                     'Rate limit timeout ({0}s remaining)'.format(
                         round(active_timeout - now, 2))
                 }, 429
             else:
                 rate_info['timeout'] = None
         rate_info['calls'] = [
             call for call in rate_info['calls'] if call > (now - per)
         ] + [now]
         calls = len(rate_info['calls'])
         if calls > amount:
             rate_info['timeout'] = now + timeout
             client.set(key, rate_info)
             logger.warning(
                 'Call {0} is being throttled with a wait of {1}'.
                 format(key, timeout))
             return HttpResponse, {
                 'error_code':
                 'rate_limit_reached',
                 'error':
                 'Rate limit reached ({0} in last {1}s)'.format(
                     calls, per)
             }, 429
         client.set(key, rate_info)
     finally:
         mutex.release()
     return f(self, request, *args, **kwargs)
Пример #27
0
 def ping(storagerouter_guid, timestamp):
     """
     Update a StorageRouter's celery heartbeat
     :param storagerouter_guid: Guid of the StorageRouter to update
     :type storagerouter_guid: str
     :param timestamp: Timestamp to compare to
     :type timestamp: float
     :return: None
     :rtype: NoneType
     """
     with volatile_mutex(
             'storagerouter_heartbeat_{0}'.format(storagerouter_guid)):
         storagerouter = StorageRouter(storagerouter_guid)
         if timestamp > storagerouter.heartbeats.get('celery', 0):
             storagerouter.heartbeats['celery'] = timestamp
             storagerouter.save()
Пример #28
0
    def lock_default_mode(self):
        # type: () -> None
        """
        Lock function racing for default ensure single mode
        Checks all possible task names
        :raises: EnsureSingleTaskDiscarded: If the task was discarded
        :raises: EnsureSingleDoCallBack: If the task is required to call the callback function instead of the task function
        """
        # @todo instead of volatile mutex, do asserts within transaction
        # Acquire
        try:
            self.run_hook('before_validation')
            with volatile_mutex(self.persistent_key, wait=5):
                for task in self.ensure_single_container.task_names:
                    key_to_check = self.generate_key_for_task_with_mode(task)
                    if self.persistent_client.exists(key_to_check):
                        # Raising errors as potential long functions should not be invoked in the acquire phase
                        if self.async_task or not self.ensure_single_container.callback:
                            self.discard_task()
                        else:
                            self.callback_task()
            self.logger.info(
                self.message.format('Setting key {0}'.format(
                    self.persistent_key)))
            self._register_task({'task_id': self.task_id})
        finally:
            self.run_hook('after_validation')

        success = True
        self.unittest_set_state_executing()
        self.run_hook('before_execution')
        try:
            yield
        # Release
        except:
            success = False
            raise
        finally:
            if success:
                self.unittest_set_state_finished()
                self.logger.info(
                    self.message.format(
                        'Task {0} finished successfully'.format(
                            self.ensure_single_container.task_name)))
            # Release
            self._unregister_task(delete=True)
            self.run_hook('after_execution')
 def wrapped(*args, **kwargs):
     if lock_type == 'local':
         _mutex = file_mutex(key)
     elif lock_type == 'cluster':
         _mutex = volatile_mutex(key)
     else:
         raise ValueError('Lock type {0} is not supported!'.format(lock_type))
     try:
         _mutex.acquire(wait=0.005)
         local_sr = System.get_my_storagerouter()
         CacheHelper.set(key=key, item={'ip': local_sr.ip, 'hostname': local_sr.name}, expire_time=60)
         return func(*args, **kwargs)
     except (NoFileLockAvailableException, NoVolatileLockAvailableException):
         if callback is None:
             return
         else:
             executor_info = None
             start = time.time()
             while executor_info is None:
                 # Calculated guesswork. If a callback function would be expected, the acquire has happened for another executor  the volatilekey should be set eventually
                 # However by setting it after the acquire, the callback executor and original method executor can race between fetch and set
                 # A better implementation would be relying on the fwk ensure_single_decorator as they check for various races themselves
                 # This is just a poor mans, temporary implementation
                 if start - time.time() > 5:
                     raise ValueError('Timed out after 5 seconds while fetching the information about the executor.')
                 try:
                     executor_info = CacheHelper.get(key=key)
                 except:
                     pass
             callback_func = callback.__func__ if isinstance(callback, staticmethod) else callback
             argnames = inspect.getargspec(callback_func)[0]
             arguments = list(args)
             kwargs.update({'test_name': func.__name__})
             if executor_info is not None:
                 kwargs.update(executor_info)
                 if 'result_handler' in argnames:
                     result_handler = kwargs.get('result_handler')
                     for index, arg in enumerate(arguments):
                         if isinstance(arg, HCResults.HCResultCollector):
                             result_handler = arguments.pop(index)
                             break
                     if result_handler is None:
                         raise TypeError('Expected an instance of {0}'.format(HCResults.HCResultCollector))
                     kwargs['result_handler'] = result_handler
             return callback_func(**kwargs)
     finally:
         _mutex.release()
Пример #30
0
 def _register_task(self, registration_data):
     # type: (any) -> list
     """
     Registers the execution of the task
     :param registration_data: Dict containing all arguments as key word arguments
     :type registration_data: any
     :return: All registrations
     :rtype: list
     """
     # @todo use transactions instead
     with volatile_mutex(self.persistent_key, wait=5):
         current_registrations, initial_registrations = self.get_task_registrations(
         )
         current_registrations.append(registration_data)
         self.persistent_client.set(self.task_registration_key,
                                    current_registrations)
     return current_registrations
Пример #31
0
 def invalidate_dynamics(self, properties=None):
     """
     Invalidates all dynamic property caches. Use with caution, as this action can introduce
     a short performance hit.
     :param properties: Properties to invalidate
     """
     if properties is not None and not isinstance(properties, list):
         properties = [properties]
     for dynamic in self._dynamics:
         if properties is None or dynamic.name in properties:
             key = '{0}_{1}'.format(self._key, dynamic.name)
             mutex = volatile_mutex(key)
             try:
                 if dynamic.locked:
                     mutex.acquire()
                 self._volatile.delete(key)
             finally:
                 mutex.release()
Пример #32
0
 def invalidate_dynamics(self, properties=None):
     """
     Invalidates all dynamic property caches. Use with caution, as this action can introduce
     a short performance hit.
     :param properties: Properties to invalidate
     """
     if properties is not None and not isinstance(properties, list):
         properties = [properties]
     for dynamic in self._dynamics:
         if properties is None or dynamic.name in properties:
             key = '{0}_{1}'.format(self._key, dynamic.name)
             mutex = volatile_mutex(key)
             try:
                 if dynamic.locked:
                     mutex.acquire()
                 self._volatile.delete(key)
             finally:
                 mutex.release()
Пример #33
0
    def get_unused_arakoon_metadata_and_claim(cluster_type, locked=True):
        """
        Retrieve arakoon cluster information based on its type
        :param cluster_type: Type of arakoon cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str
        :param locked: Execute this in a locked context
        :type locked: bool
        :return: Metadata of the arakoon cluster
        :rtype: dict
        """
        cluster_type = cluster_type.upper()
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError(
                'Unsupported arakoon cluster type provided. Please choose from {0}'
                .format(', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))
        if not Configuration.dir_exists(ArakoonInstaller.CONFIG_ROOT):
            return None

        mutex = volatile_mutex('claim_arakoon_metadata', wait=10)
        try:
            if locked is True:
                mutex.acquire()

            for cluster_name in Configuration.list(
                    ArakoonInstaller.CONFIG_ROOT):
                config = ArakoonClusterConfig(cluster_id=cluster_name,
                                              filesystem=False)
                config.load_config()
                arakoon_client = ArakoonInstaller.build_client(config)
                if arakoon_client.exists(ArakoonInstaller.METADATA_KEY):
                    metadata = json.loads(
                        arakoon_client.get(ArakoonInstaller.METADATA_KEY))
                    if metadata['cluster_type'] == cluster_type and metadata[
                            'in_use'] is False and metadata[
                                'internal'] is False:
                        metadata['in_use'] = True
                        arakoon_client.set(ArakoonInstaller.METADATA_KEY,
                                           json.dumps(metadata, indent=4))
                        return metadata
        finally:
            if locked is True:
                mutex.release()
Пример #34
0
    def update_vmachine_name(instance_id, old_name, new_name):
        """
        Update a vMachine name: find vmachine by management center instance id, set new name
        :param instance_id: ID for the virtual machine known by management center
        :param old_name: Old name of the virtual machine
        :param new_name: New name for the virtual machine
        """
        vmachine = None
        for mgmt_center in MgmtCenterList.get_mgmtcenters():
            mgmt = Factory.get_mgmtcenter(mgmt_center = mgmt_center)
            try:
                machine_info = mgmt.get_vmachine_device_info(instance_id)
                file_name = machine_info['file_name']
                host_name = machine_info['host_name']
                vpool_name = machine_info['vpool_name']
                storage_router = StorageRouterList.get_by_name(host_name)
                machine_id = storage_router.machine_id
                device_name = '{0}/{1}'.format(machine_id, file_name)
                vp = VPoolList.get_vpool_by_name(vpool_name)
                vmachine = VMachineList.get_by_devicename_and_vpool(device_name, vp)
                if vmachine:
                    break
                vmachine = VMachineList.get_by_devicename_and_vpool(device_name, None)
                if vmachine:
                    break
            except Exception as ex:
                VMachineController._logger.info('Trying to get mgmt center failed for vmachine {0}. {1}'.format(old_name, ex))
        if not vmachine:
            VMachineController._logger.error('No vmachine found for name {0}'.format(old_name))
            return

        vpool = vmachine.vpool
        mutex = volatile_mutex('{0}_{1}'.format(old_name, vpool.guid if vpool is not None else 'none'))
        try:
            mutex.acquire(wait=5)
            vmachine.name = new_name
            vmachine.save()
        finally:
            mutex.release()
Пример #35
0
    def update_vmachine_name(instance_id, old_name, new_name):
        """
        Update a vMachine name: find vmachine by management center instance id, set new name
        :param instance_id: ID for the virtual machine known by management center
        :param old_name: Old name of the virtual machine
        :param new_name: New name for the virtual machine
        """
        vmachine = None
        for mgmt_center in MgmtCenterList.get_mgmtcenters():
            mgmt = Factory.get_mgmtcenter(mgmt_center = mgmt_center)
            try:
                machine_info = mgmt.get_vmachine_device_info(instance_id)
                file_name = machine_info['file_name']
                host_name = machine_info['host_name']
                vpool_name = machine_info['vpool_name']
                storage_router = StorageRouterList.get_by_name(host_name)
                machine_id = storage_router.machine_id
                device_name = '{0}/{1}'.format(machine_id, file_name)
                vp = VPoolList.get_vpool_by_name(vpool_name)
                vmachine = VMachineList.get_by_devicename_and_vpool(device_name, vp)
                if vmachine:
                    break
                vmachine = VMachineList.get_by_devicename_and_vpool(device_name, None)
                if vmachine:
                    break
            except Exception as ex:
                VMachineController._logger.info('Trying to get mgmt center failed for vmachine {0}. {1}'.format(old_name, ex))
        if not vmachine:
            VMachineController._logger.error('No vmachine found for name {0}'.format(old_name))
            return

        vpool = vmachine.vpool
        mutex = volatile_mutex('{0}_{1}'.format(old_name, vpool.guid if vpool is not None else 'none'))
        try:
            mutex.acquire(wait=5)
            vmachine.name = new_name
            vmachine.save()
        finally:
            mutex.release()
Пример #36
0
    def create_cluster(cluster_name, cluster_type, ip, base_dir, plugins=None, locked=True, internal=True, claim=False):
        """
        Always creates a cluster but marks it's usage according to the internal flag
        :param cluster_name: Name of the cluster
        :type cluster_name: str

        :param cluster_type: Type of the cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str

        :param ip: IP address of the first node of the new cluster
        :type ip: str

        :param base_dir: Base directory that should contain the data and tlogs
        :type base_dir: str

        :param plugins: Plugins that should be added to the configuration file
        :type plugins: list

        :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool

        :param internal: Is cluster internally managed by OVS
        :type internal: bool

        :param claim: Claim the cluster right away
        :type claim: bool

        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Cluster type {0} is not supported. Please choose from {1}'.format(cluster_type, ', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))

        if EtcdConfiguration.dir_exists('/ovs/arakoon/{0}'.format(cluster_name)):
            raise ValueError('An Arakoon cluster with name "{0}" already exists'.format(cluster_name))

        ArakoonInstaller._logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip))
        base_dir = base_dir.rstrip('/')

        client = SSHClient(ip, username=ArakoonInstaller.SSHCLIENT_USER)
        if ArakoonInstaller.is_running(cluster_name, client):
            ArakoonInstaller._logger.info('Arakoon service running for cluster {0}'.format(cluster_name))
            config = ArakoonClusterConfig(cluster_name, plugins)
            config.load_config()
            for node in config.nodes:
                if node.ip == ip:
                    return {'client_port': node.client_port,
                            'messaging_port': node.messaging_port}

        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(ip, [home_dir, tlog_dir])

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            config = ArakoonClusterConfig(cluster_name, plugins)
            config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                  ip=ip,
                                                  client_port=ports[0],
                                                  messaging_port=ports[1],
                                                  log_sinks=LogHandler.get_sink_path('arakoon_server'),
                                                  crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'),
                                                  home=home_dir,
                                                  tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)

            metadata = ArakoonClusterMetadata(cluster_id=cluster_name)
            metadata.internal = internal
            metadata.cluster_type = cluster_type.upper()
            metadata.write()
            if claim is True:
                metadata.claim()
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Creating cluster {0} on {1} completed'.format(cluster_name, ip))
        return {'metadata': metadata,
                'client_port': ports[0],
                'messaging_port': ports[1]}
Пример #37
0
    def __init__(self,
                 guid=None,
                 data=None,
                 datastore_wins=False,
                 volatile=False,
                 _hook=None):
        """
        Loads an object with a given guid. If no guid is given, a new object
        is generated with a new guid.
        * guid: The guid indicating which object should be loaded
        * datastoreWins: Optional boolean indicating save conflict resolve management.
        ** True: when saving, external modified fields will not be saved
        ** False: when saving, all changed data will be saved, regardless of external updates
        ** None: in case changed field were also changed externally, an error will be raised
        """

        # Initialize super class
        super(DataObject, self).__init__()

        # Initialize internal fields
        self._frozen = False
        self._datastore_wins = datastore_wins
        self._guid = None  # Guid identifier of the object
        self._original = {}  # Original data copy
        self._metadata = {}  # Some metadata, mainly used for unit testing
        self._data = {}  # Internal data storage
        self._objects = {}  # Internal objects storage

        # Initialize public fields
        self.dirty = False
        self.volatile = volatile

        # Worker fields/objects
        self._classname = self.__class__.__name__.lower()

        # Rebuild _relation types
        hybrid_structure = HybridRunner.get_hybrids()
        for relation in self._relations:
            if relation.foreign_type is not None:
                identifier = Descriptor(
                    relation.foreign_type).descriptor['identifier']
                if identifier in hybrid_structure and identifier != hybrid_structure[
                        identifier]['identifier']:
                    relation.foreign_type = Descriptor().load(
                        hybrid_structure[identifier]).get_object()

        # Init guid
        self._new = False
        if guid is None:
            self._guid = str(uuid.uuid4())
            self._new = True
        else:
            self._guid = str(guid)

        # Build base keys
        self._key = '{0}_{1}_{2}'.format(DataObject.NAMESPACE, self._classname,
                                         self._guid)

        # Worker mutexes
        self._mutex_version = volatile_mutex('ovs_dataversion_{0}_{1}'.format(
            self._classname, self._guid))

        # Load data from cache or persistent backend where appropriate
        self._volatile = VolatileFactory.get_client()
        self._persistent = PersistentFactory.get_client()
        self._metadata['cache'] = None
        if self._new:
            self._data = {}
        else:
            if data is not None:
                self._data = copy.deepcopy(data)
                self._metadata['cache'] = None
            else:
                self._data = self._volatile.get(self._key)
                if self._data is None:
                    self._metadata['cache'] = False
                    try:
                        self._data = self._persistent.get(self._key)
                    except KeyNotFoundException:
                        raise ObjectNotFoundException(
                            '{0} with guid \'{1}\' could not be found'.format(
                                self.__class__.__name__, self._guid))
                else:
                    self._metadata['cache'] = True

        # Set default values on new fields
        for prop in self._properties:
            if prop.name not in self._data:
                self._data[prop.name] = prop.default
            self._add_property(prop)

        # Load relations
        for relation in self._relations:
            if relation.name not in self._data:
                if relation.foreign_type is None:
                    cls = self.__class__
                else:
                    cls = relation.foreign_type
                self._data[relation.name] = Descriptor(cls).descriptor
            self._add_relation_property(relation)

        # Add wrapped properties
        for dynamic in self._dynamics:
            self._add_dynamic_property(dynamic)

        # Load foreign keys
        relations = RelationMapper.load_foreign_relations(self.__class__)
        if relations is not None:
            for key, info in relations.iteritems():
                self._objects[key] = {'info': info, 'data': None}
                self._add_list_property(key, info['list'])

        if _hook is not None and hasattr(_hook, '__call__'):
            _hook()

        if not self._new:
            # Re-cache the object, if required
            if self._metadata['cache'] is False:
                # The data wasn't loaded from the cache, so caching is required now
                try:
                    self._mutex_version.acquire(30)
                    this_version = self._data['_version']
                    store_version = self._persistent.get(self._key)['_version']
                    if this_version == store_version:
                        self._volatile.set(self._key, self._data)
                except KeyNotFoundException:
                    raise ObjectNotFoundException(
                        '{0} with guid \'{1}\' could not be found'.format(
                            self.__class__.__name__, self._guid))
                finally:
                    self._mutex_version.release()

        # Freeze property creation
        self._frozen = True

        # Optionally, initialize some fields
        if data is not None:
            for prop in self._properties:
                if prop.name in data:
                    setattr(self, prop.name, data[prop.name])

        # Store original data
        self._original = copy.deepcopy(self._data)
Пример #38
0
        def new_function(self, *args, **kwargs):
            """
            Wrapped function
            :param self: With bind=True, the celery task result itself is passed in
            :param args: Arguments without default values
            :param kwargs: Arguments with default values
            """
            def log_message(message, level='info'):
                """
                Log a message with some additional information
                :param message: Message to log
                :param level:   Log level
                :return:        None
                """
                if level not in ('info', 'warning', 'debug', 'error', 'exception'):
                    raise ValueError('Unsupported log level "{0}" specified'.format(level))
                if unittest_mode is False:
                    complete_message = 'Ensure single {0} mode - ID {1} - {2}'.format(mode, now, message)
                else:
                    complete_message = 'Ensure single {0} mode - ID {1} - {2} - {3}'.format(mode, now, threading.current_thread().getName(), message)
                getattr(logger, level)(complete_message)

            def update_value(key, append, value_to_update=None):
                """
                Store the specified value in the PersistentFactory
                :param key:             Key to store the value for
                :param append:          If True, the specified value will be appended else element at index 0 will be popped
                :param value_to_update: Value to append to the list or remove from the list
                :return:                Updated value
                """
                with volatile_mutex(name=key, wait=5):
                    vals = list(persistent_client.get_multi([key], must_exist=False))
                    if vals[0] is not None:
                        val = vals[0]
                        if append is True and value_to_update is not None:
                            val['values'].append(value_to_update)
                        elif append is False and value_to_update is not None:
                            for value_item in val['values']:
                                if value_item == value_to_update:
                                    val['values'].remove(value_item)
                                    break
                        elif append is False and len(val['values']) > 0:
                            val['values'].pop(0)
                    else:
                        log_message('Setting initial value for key {0}'.format(key))
                        val = {'mode': mode,
                               'values': []}
                    persistent_client.set(key, val)
                return val

            if not hasattr(self, 'request'):
                raise RuntimeError('The decorator ensure_single can only be applied to bound tasks (with bind=True argument)')

            now = '{0}_{1}'.format(int(time.time()), ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)))
            task_id = self.request.id
            async_task = task_id is not None  # Async tasks have an ID, inline executed tasks have None as ID
            task_names = [task_name] if extra_task_names is None else [task_name] + extra_task_names
            thread_name = threading.current_thread().getName()
            unittest_mode = os.environ.get('RUNNING_UNITTESTS') == 'True'
            persistent_key = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task_name)
            persistent_client = PersistentFactory.get_client()

            if mode == 'DEFAULT':
                with volatile_mutex(persistent_key, wait=5):
                    for task in task_names:
                        key_to_check = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task)
                        if persistent_client.exists(key_to_check):
                            if async_task is True or callback is None:
                                log_message('Execution of task {0} discarded'.format(task_name))
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('DISCARDED', None)
                                return None
                            else:
                                log_message('Execution of task {0} in progress, executing callback function'.format(task_name))
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('CALLBACK', None)
                                return callback(*args, **kwargs)

                    log_message('Setting key {0}'.format(persistent_key))
                    persistent_client.set(persistent_key, {'mode': mode,
                                                           'values': [{'task_id': task_id}]})

                try:
                    if unittest_mode is True:
                        Decorators.unittest_thread_info_by_name[thread_name] = ('EXECUTING', None)
                    output = f(*args, **kwargs)
                    if unittest_mode is True:
                        Decorators.unittest_thread_info_by_name[thread_name] = ('FINISHED', None)
                        Decorators.unittest_thread_info_by_state['FINISHED'].append(thread_name)
                    log_message('Task {0} finished successfully'.format(task_name))
                    return output
                finally:
                    with volatile_mutex(persistent_key, wait=5):
                        log_message('Deleting key {0}'.format(persistent_key))
                        persistent_client.delete(persistent_key, must_exist=False)

            elif mode == 'DEDUPED':
                if extra_task_names is not None:
                    log_message('Extra tasks are not allowed in this mode',
                                level='error')
                    raise ValueError('Ensure single {0} mode - ID {1} - Extra tasks are not allowed in this mode'.format(mode, now))

                # Update kwargs with args
                sleep = 1 if unittest_mode is False else 0.1
                timeout = kwargs.pop('ensure_single_timeout', 10 if unittest_mode is True else global_timeout)
                function_info = inspect.getargspec(f)
                kwargs_dict = {}
                for index, arg in enumerate(args):
                    kwargs_dict[function_info.args[index]] = arg
                kwargs_dict.update(kwargs)
                params_info = 'with params {0}'.format(kwargs_dict) if kwargs_dict else 'with default params'

                # Set the key in arakoon if non-existent
                value = update_value(key=persistent_key,
                                     append=True)

                # Validate whether another job with same params is being executed
                job_counter = 0
                for item in value['values']:
                    if item['kwargs'] == kwargs_dict:
                        job_counter += 1
                        if job_counter == 2:  # 1st job with same params is being executed, 2nd is scheduled for execution ==> Discard current
                            if async_task is True:  # Not waiting for other jobs to finish since asynchronously
                                log_message('Execution of task {0} {1} discarded because of identical parameters'.format(task_name, params_info))
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('DISCARDED', None)
                                return None

                            # If executed inline (sync), execute callback if any provided
                            if callback is not None:
                                log_message('Execution of task {0} {1} in progress, executing callback function'.format(task_name, params_info))
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('CALLBACK', None)
                                return callback(*args, **kwargs)

                            # Let's wait for 2nd job in queue to have finished if no callback provided
                            slept = 0
                            while slept < timeout:
                                log_message('Task {0} {1} is waiting for similar tasks to finish - ({2})'.format(task_name, params_info, slept + sleep))
                                values = list(persistent_client.get_multi([persistent_key], must_exist=False))
                                if values[0] is None:
                                    if unittest_mode is True:
                                        Decorators.unittest_thread_info_by_name[thread_name] = ('WAITED', None)
                                    return None  # All pending jobs have been deleted in the meantime, no need to wait
                                if item['timestamp'] not in [value['timestamp'] for value in values[0]['values']]:
                                    if unittest_mode is True:
                                        Decorators.unittest_thread_info_by_name[thread_name] = ('WAITED', None)
                                    return None  # Similar tasks have been executed, so sync task currently waiting can return without having been executed
                                slept += sleep
                                time.sleep(sleep)
                                if slept >= timeout:
                                    log_message('Task {0} {1} waited {2}s for similar tasks to finish, but timeout was reached'.format(task_name, params_info, slept),
                                                level='error')
                                    if unittest_mode is True:
                                        Decorators.unittest_thread_info_by_name[thread_name] = ('EXCEPTION', 'Could not start within timeout of {0}s while waiting for other tasks'.format(timeout))
                                    raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode,
                                                                                                                                                                     now,
                                                                                                                                                                     task_name,
                                                                                                                                                                     timeout))
                                if unittest_mode is True:
                                    if thread_name not in Decorators.unittest_thread_info_by_state['WAITING']:
                                        Decorators.unittest_thread_info_by_state['WAITING'].append(thread_name)

                log_message('New task {0} {1} scheduled for execution'.format(task_name, params_info))
                update_value(key=persistent_key,
                             append=True,
                             value_to_update={'kwargs': kwargs_dict,
                                              'task_id': task_id,
                                              'timestamp': now})

                # Poll the arakoon to see whether this call is the only in list, if so --> execute, else wait
                slept = 0
                while slept < timeout:
                    values = list(persistent_client.get_multi([persistent_key], must_exist=False))
                    if values[0] is not None:
                        queued_jobs = [v for v in values[0]['values'] if v['kwargs'] == kwargs_dict]
                        if len(queued_jobs) != 1:
                            if unittest_mode is True:
                                Decorators.unittest_thread_info_by_name[thread_name] = ('WAITING', None)
                                if thread_name not in Decorators.unittest_thread_info_by_state['WAITING']:
                                    Decorators.unittest_thread_info_by_state['WAITING'].append(thread_name)
                        else:
                            try:
                                if slept != 0:
                                    log_message('Task {0} {1} had to wait {2} seconds before being able to start'.format(task_name,
                                                                                                                         params_info,
                                                                                                                         slept))
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('EXECUTING', None)
                                output = f(*args, **kwargs)
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('FINISHED', None)
                                    Decorators.unittest_thread_info_by_state['FINISHED'].append(thread_name)
                                log_message('Task {0} finished successfully'.format(task_name))
                                return output
                            finally:
                                update_value(key=persistent_key,
                                             append=False,
                                             value_to_update=queued_jobs[0])
                    slept += sleep
                    time.sleep(sleep)
                    if slept >= timeout:
                        update_value(key=persistent_key,
                                     append=False,
                                     value_to_update={'kwargs': kwargs_dict,
                                                      'task_id': task_id,
                                                      'timestamp': now})
                        log_message('Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'.format(task_name, params_info, timeout),
                                    level='error')
                        if unittest_mode is True:
                            Decorators.unittest_thread_info_by_name[thread_name] = ('EXCEPTION', 'Could not start within timeout of {0}s while queued'.format(timeout))
                        raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode,
                                                                                                                                                         now,
                                                                                                                                                         task_name,
                                                                                                                                                         timeout))

            elif mode == 'CHAINED':
                if extra_task_names is not None:
                    log_message('Extra tasks are not allowed in this mode',
                                level='error')
                    raise ValueError('Ensure single {0} mode - ID {1} - Extra tasks are not allowed in this mode'.format(mode, now))

                # Update kwargs with args
                sleep = 1 if unittest_mode is False else 0.1
                timeout = kwargs.pop('ensure_single_timeout', 10 if unittest_mode is True else global_timeout)
                function_info = inspect.getargspec(f)
                kwargs_dict = {}
                for index, arg in enumerate(args):
                    kwargs_dict[function_info.args[index]] = arg
                kwargs_dict.update(kwargs)
                params_info = 'with params {0}'.format(kwargs_dict) if kwargs_dict else 'with default params'

                # Set the key in arakoon if non-existent
                value = update_value(key=persistent_key,
                                     append=True)

                # Validate whether another job with same params is being executed, skip if so
                for item in value['values'][1:]:  # 1st element is processing job, we check all other queued jobs for identical params
                    if item['kwargs'] == kwargs_dict:
                        if async_task is True:  # Not waiting for other jobs to finish since asynchronously
                            log_message('Execution of task {0} {1} discarded because of identical parameters'.format(task_name, params_info))
                            if unittest_mode is True:
                                Decorators.unittest_thread_info_by_name[thread_name] = ('DISCARDED', None)
                            return None

                        # If executed inline (sync), execute callback if any provided
                        if callback is not None:
                            log_message('Execution of task {0} {1} in progress, executing callback function'.format(task_name, params_info))
                            if unittest_mode is True:
                                Decorators.unittest_thread_info_by_name[thread_name] = ('CALLBACK', None)
                            return callback(*args, **kwargs)

                        # Let's wait for 2nd job in queue to have finished if no callback provided
                        slept = 0
                        while slept < timeout:
                            log_message('Task {0} {1} is waiting for similar tasks to finish - ({2})'.format(task_name, params_info, slept + sleep))
                            values = list(persistent_client.get_multi([persistent_key], must_exist=False))
                            if values[0] is None:
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('WAITED', None)
                                return None  # All pending jobs have been deleted in the meantime, no need to wait
                            if item['timestamp'] not in [value['timestamp'] for value in values[0]['values']]:
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('WAITED', None)
                                return None  # Similar tasks have been executed, so sync task currently waiting can return without having been executed
                            slept += sleep
                            time.sleep(sleep)
                            if slept >= timeout:
                                log_message('Task {0} {1} waited {2}s for similar tasks to finish, but timeout was reached'.format(task_name, params_info, slept),
                                            level='error')
                                if unittest_mode is True:
                                    Decorators.unittest_thread_info_by_name[thread_name] = ('EXCEPTION', 'Could not start within timeout of {0}s while waiting for other tasks'.format(timeout))
                                raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode,
                                                                                                                                                                 now,
                                                                                                                                                                 task_name,
                                                                                                                                                                 timeout))
                            if unittest_mode is True:
                                if thread_name not in Decorators.unittest_thread_info_by_state['WAITING']:
                                    Decorators.unittest_thread_info_by_state['WAITING'].append(thread_name)

                log_message('New task {0} {1} scheduled for execution'.format(task_name, params_info))
                update_value(key=persistent_key,
                             append=True,
                             value_to_update={'kwargs': kwargs_dict,
                                              'task_id': task_id,
                                              'timestamp': now})

                # Poll the arakoon to see whether this call is the first in list, if so --> execute, else wait
                first_element = None
                slept = 0
                while slept < timeout:
                    values = list(persistent_client.get_multi([persistent_key], must_exist=False))
                    if values[0] is not None:
                        value = values[0]
                        first_element = value['values'][0]['timestamp'] if len(value['values']) > 0 else None

                    if first_element == now:
                        try:
                            if slept > 0:
                                log_message('Task {0} {1} had to wait {2} seconds before being able to start'.format(task_name,
                                                                                                                     params_info,
                                                                                                                     slept))
                            if unittest_mode is True:
                                Decorators.unittest_thread_info_by_name[thread_name] = ('EXECUTING', None)
                            output = f(*args, **kwargs)
                            if unittest_mode is True:
                                Decorators.unittest_thread_info_by_name[thread_name] = ('FINISHED', None)
                                Decorators.unittest_thread_info_by_state['FINISHED'].append(thread_name)
                            log_message('Task {0} finished successfully'.format(task_name))
                            return output
                        finally:
                            update_value(key=persistent_key,
                                         append=False)
                    else:
                        if unittest_mode is True:
                            if thread_name not in Decorators.unittest_thread_info_by_state['WAITING']:
                                Decorators.unittest_thread_info_by_name[thread_name] = ('WAITING', None)
                                Decorators.unittest_thread_info_by_state['WAITING'].append(thread_name)

                    slept += sleep
                    time.sleep(sleep)
                    if slept >= timeout:
                        update_value(key=persistent_key,
                                     append=False,
                                     value_to_update={'kwargs': kwargs_dict,
                                                      'task_id': task_id,
                                                      'timestamp': now})
                        log_message('Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'.format(task_name, params_info, timeout),
                                    level='error')
                        if unittest_mode is True:
                            Decorators.unittest_thread_info_by_name[thread_name] = ('EXCEPTION', 'Could not start within timeout of {0}s while queued'.format(timeout))
                        raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode,
                                                                                                                                                         now,
                                                                                                                                                         task_name,
                                                                                                                                                         timeout))
            else:
                raise ValueError('Unsupported mode "{0}" provided'.format(mode))
Пример #39
0
    def __init__(self, guid=None, data=None, datastore_wins=False, volatile=False, _hook=None):
        """
        Loads an object with a given guid. If no guid is given, a new object
        is generated with a new guid.
        * guid: The guid indicating which object should be loaded
        * datastoreWins: Optional boolean indicating save conflict resolve management.
        ** True: when saving, external modified fields will not be saved
        ** False: when saving, all changed data will be saved, regardless of external updates
        ** None: in case changed field were also changed externally, an error will be raised
        """

        # Initialize super class
        super(DataObject, self).__init__()

        # Initialize internal fields
        self._frozen = False
        self._datastore_wins = datastore_wins
        self._guid = None    # Guid identifier of the object
        self._original = {}  # Original data copy
        self._metadata = {}  # Some metadata, mainly used for unit testing
        self._data = {}      # Internal data storage
        self._objects = {}   # Internal objects storage

        # Initialize public fields
        self.dirty = False
        self.volatile = volatile

        # Worker fields/objects
        self._classname = self.__class__.__name__.lower()

        # Rebuild _relation types
        hybrid_structure = HybridRunner.get_hybrids()
        for relation in self._relations:
            if relation.foreign_type is not None:
                identifier = Descriptor(relation.foreign_type).descriptor['identifier']
                if identifier in hybrid_structure and identifier != hybrid_structure[identifier]['identifier']:
                    relation.foreign_type = Descriptor().load(hybrid_structure[identifier]).get_object()

        # Init guid
        self._new = False
        if guid is None:
            self._guid = str(uuid.uuid4())
            self._new = True
        else:
            self._guid = str(guid)

        # Build base keys
        self._key = '{0}_{1}_{2}'.format(DataObject.NAMESPACE, self._classname, self._guid)

        # Worker mutexes
        self._mutex_version = volatile_mutex('ovs_dataversion_{0}_{1}'.format(self._classname, self._guid))

        # Load data from cache or persistent backend where appropriate
        self._volatile = VolatileFactory.get_client()
        self._persistent = PersistentFactory.get_client()
        self._metadata['cache'] = None
        if self._new:
            self._data = {}
        else:
            if data is not None:
                self._data = copy.deepcopy(data)
                self._metadata['cache'] = None
            else:
                self._data = self._volatile.get(self._key)
                if self._data is None:
                    self._metadata['cache'] = False
                    try:
                        self._data = self._persistent.get(self._key)
                    except KeyNotFoundException:
                        raise ObjectNotFoundException('{0} with guid \'{1}\' could not be found'.format(
                            self.__class__.__name__, self._guid
                        ))
                else:
                    self._metadata['cache'] = True

        # Set default values on new fields
        for prop in self._properties:
            if prop.name not in self._data:
                self._data[prop.name] = prop.default
            self._add_property(prop)

        # Load relations
        for relation in self._relations:
            if relation.name not in self._data:
                if relation.foreign_type is None:
                    cls = self.__class__
                else:
                    cls = relation.foreign_type
                self._data[relation.name] = Descriptor(cls).descriptor
            self._add_relation_property(relation)

        # Add wrapped properties
        for dynamic in self._dynamics:
            self._add_dynamic_property(dynamic)

        # Load foreign keys
        relations = RelationMapper.load_foreign_relations(self.__class__)
        if relations is not None:
            for key, info in relations.iteritems():
                self._objects[key] = {'info': info,
                                      'data': None}
                self._add_list_property(key, info['list'])

        if _hook is not None and hasattr(_hook, '__call__'):
            _hook()

        if not self._new:
            # Re-cache the object, if required
            if self._metadata['cache'] is False:
                # The data wasn't loaded from the cache, so caching is required now
                try:
                    self._mutex_version.acquire(30)
                    this_version = self._data['_version']
                    store_version = self._persistent.get(self._key)['_version']
                    if this_version == store_version:
                        self._volatile.set(self._key, self._data)
                except KeyNotFoundException:
                    raise ObjectNotFoundException('{0} with guid \'{1}\' could not be found'.format(
                        self.__class__.__name__, self._guid
                    ))
                finally:
                    self._mutex_version.release()

        # Freeze property creation
        self._frozen = True

        # Optionally, initialize some fields
        if data is not None:
            for prop in self._properties:
                if prop.name in data:
                    setattr(self, prop.name, data[prop.name])

        # Store original data
        self._original = copy.deepcopy(self._data)
Пример #40
0
    def clone(machineguid, timestamp, name):
        """
        Clone a vmachine using the disk snapshot based on a snapshot timestamp

        @param machineguid: guid of the machine to clone
        @param timestamp: timestamp of the disk snapshots to use for the clone
        @param name: name for the new machine
        """
        machine = VMachine(machineguid)
        timestamp = str(timestamp)
        if timestamp not in (snap['timestamp'] for snap in machine.snapshots):
            raise RuntimeError('Invalid timestamp provided, not a valid snapshot of this vmachine.')

        vpool = None
        storagerouter = None
        if machine.pmachine is not None and machine.pmachine.hvtype == 'VMWARE':
            for vdisk in machine.vdisks:
                if vdisk.vpool is not None:
                    vpool = vdisk.vpool
                    break
        for vdisk in machine.vdisks:
            if vdisk.storagerouter_guid:
                storagerouter = StorageRouter(vdisk.storagerouter_guid)
                break
        hv = Factory.get(machine.pmachine)
        vm_path = hv.get_vmachine_path(name, storagerouter.machine_id if storagerouter is not None else '')
        # mutex in sync_with_hypervisor uses "None" for KVM hvtype
        mutex = volatile_mutex('{0}_{1}'.format(hv.clean_vmachine_filename(vm_path), vpool.guid if vpool is not None else 'none'))

        disks = {}
        for snapshot in machine.snapshots:
            if snapshot['timestamp'] == timestamp:
                for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                    disks[diskguid] = snapshotguid

        try:
            mutex.acquire(wait=120)
            new_machine = VMachine()
            new_machine.copy(machine)
            new_machine.name = name
            new_machine.devicename = hv.clean_vmachine_filename(vm_path)
            new_machine.pmachine = machine.pmachine
            new_machine.save()
        finally:
            mutex.release()

        new_disk_guids = []
        vm_disks = []
        mountpoint = None
        disks_by_order = sorted(machine.vdisks, key=lambda x: x.order)
        try:
            for currentDisk in disks_by_order:
                if machine.is_vtemplate and currentDisk.templatesnapshot:
                    snapshotid = currentDisk.templatesnapshot
                else:
                    snapshotid = disks[currentDisk.guid]
                prefix = '%s-clone' % currentDisk.name

                result = VDiskController.clone(diskguid=currentDisk.guid,
                                               snapshotid=snapshotid,
                                               devicename=prefix,
                                               pmachineguid=new_machine.pmachine_guid,
                                               machinename=new_machine.name,
                                               machineguid=new_machine.guid)
                new_disk_guids.append(result['diskguid'])
                mountpoint = StorageDriverList.get_by_storagedriver_id(currentDisk.storagedriver_id).mountpoint
                vm_disks.append(result)
        except Exception as ex:
            VMachineController._logger.error('Failed to clone disks. {0}'.format(ex))
            VMachineController.delete(machineguid=new_machine.guid)
            raise

        try:
            result = hv.clone_vm(machine.hypervisor_id, name, vm_disks, mountpoint)
        except Exception as ex:
            VMachineController._logger.error('Failed to clone vm. {0}'.format(ex))
            VMachineController.delete(machineguid=new_machine.guid)
            raise

        try:
            mutex.acquire(wait=120)
            new_machine.hypervisor_id = result
            new_machine.save()
        finally:
            mutex.release()
        return new_machine.guid
Пример #41
0
        def new_function(*args, **kwargs):
            """
            Wrapped function
            :param args: Arguments without default values
            :param kwargs: Arguments with default values
            """
            def log_message(message, level='info'):
                """
                Log a message with some additional information
                :param message: Message to log
                :param level:   Log level
                :return:        None
                """
                if level not in ('info', 'warning', 'debug', 'error'):
                    raise ValueError(
                        'Unsupported log level "{0}" specified'.format(level))
                complete_message = 'Ensure single {0} mode - ID {1} - {2}'.format(
                    mode, now, message)
                getattr(logger, level)(complete_message)

            def update_value(key, append, value_to_update=None):
                """
                Store the specified value in the PersistentFactory
                :param key:             Key to store the value for
                :param append:          If True, the specified value will be appended else element at index 0 will be popped
                :param value_to_update: Value to append to the list or remove from the list
                :return:                Updated value
                """
                with volatile_mutex(name=key, wait=5):
                    if persistent_client.exists(key):
                        val = persistent_client.get(key)
                        if append is True and value_to_update is not None:
                            val['values'].append(value_to_update)
                        elif append is False and value_to_update is not None:
                            for value_item in val['values']:
                                if value_item == value_to_update:
                                    val['values'].remove(value_item)
                                    break
                        elif append is False and len(val['values']) > 0:
                            val['values'].pop(0)
                    else:
                        log_message('Setting initial value for key {0}'.format(
                            persistent_key))
                        val = {'mode': mode, 'values': []}
                    persistent_client.set(key, val)
                return val

            now = '{0}_{1}'.format(
                int(time.time()), ''.join(
                    random.choice(string.ascii_letters + string.digits)
                    for _ in range(10)))
            task_names = [
                task_name
            ] if extra_task_names is None else [task_name] + extra_task_names
            persistent_key = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task_name)
            persistent_client = PersistentFactory.get_client()

            if mode == 'DEFAULT':
                with volatile_mutex(persistent_key, wait=5):
                    for task in task_names:
                        key_to_check = '{0}_{1}'.format(
                            ENSURE_SINGLE_KEY, task)
                        if persistent_client.exists(key_to_check):
                            log_message(
                                'Execution of task {0} discarded'.format(
                                    task_name))
                            return None
                    log_message('Setting key {0}'.format(persistent_key))
                    persistent_client.set(persistent_key, {'mode': mode})

                try:
                    output = function(*args, **kwargs)
                    log_message(
                        'Task {0} finished successfully'.format(task_name))
                    return output
                finally:
                    with volatile_mutex(persistent_key, wait=5):
                        if persistent_client.exists(persistent_key):
                            log_message(
                                'Deleting key {0}'.format(persistent_key))
                            persistent_client.delete(persistent_key)
            elif mode == 'DEDUPED':
                with volatile_mutex(persistent_key, wait=5):
                    if extra_task_names is not None:
                        for task in extra_task_names:
                            key_to_check = '{0}_{1}'.format(
                                ENSURE_SINGLE_KEY, task)
                            if persistent_client.exists(key_to_check):
                                log_message(
                                    'Execution of task {0} discarded'.format(
                                        task_name))
                                return None
                    log_message('Setting key {0}'.format(persistent_key))

                # Update kwargs with args
                timeout = kwargs.pop(
                    'ensure_single_timeout'
                ) if 'ensure_single_timeout' in kwargs else global_timeout
                function_info = inspect.getargspec(function)
                kwargs_dict = {}
                for index, arg in enumerate(args):
                    kwargs_dict[function_info.args[index]] = arg
                kwargs_dict.update(kwargs)
                params_info = 'with params {0}'.format(
                    kwargs_dict) if kwargs_dict else 'with default params'

                # Set the key in arakoon if non-existent
                value = update_value(key=persistent_key, append=True)

                # Validate whether another job with same params is being executed
                job_counter = 0
                for item in value['values']:
                    if item['kwargs'] == kwargs_dict:
                        job_counter += 1
                        if job_counter == 2:  # 1st job with same params is being executed, 2nd is scheduled for execution ==> Discard current
                            log_message(
                                'Execution of task {0} {1} discarded because of identical parameters'
                                .format(task_name, params_info))
                            return None
                log_message('New task {0} {1} scheduled for execution'.format(
                    task_name, params_info))
                update_value(key=persistent_key,
                             append=True,
                             value_to_update={'kwargs': kwargs_dict})

                # Poll the arakoon to see whether this call is the only in list, if so --> execute, else wait
                counter = 0
                while counter < timeout:
                    if persistent_client.exists(persistent_key):
                        values = persistent_client.get(
                            persistent_key)['values']
                        queued_jobs = [
                            v for v in values if v['kwargs'] == kwargs_dict
                        ]
                        if len(queued_jobs) == 1:
                            try:
                                if counter != 0:
                                    current_time = int(time.time())
                                    starting_time = int(now.split('_')[0])
                                    log_message(
                                        'Task {0} {1} had to wait {2} seconds before being able to start'
                                        .format(task_name, params_info,
                                                current_time - starting_time))
                                output = function(*args, **kwargs)
                                log_message(
                                    'Task {0} finished successfully'.format(
                                        task_name))
                                return output
                            finally:
                                update_value(
                                    key=persistent_key,
                                    append=False,
                                    value_to_update={'kwargs': kwargs_dict})
                        counter += 1
                        time.sleep(1)
                        if counter == timeout:
                            update_value(
                                key=persistent_key,
                                append=False,
                                value_to_update={'kwargs': kwargs_dict})
                            log_message(
                                'Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'
                                .format(task_name, params_info, timeout),
                                level='error')
                            raise EnsureSingleTimeoutReached(
                                'Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'
                                .format(mode, now, task_name, timeout))
            elif mode == 'CHAINED':
                if extra_task_names is not None:
                    log_message('Extra tasks are not allowed in this mode',
                                level='error')
                    raise ValueError(
                        'Ensure single {0} mode - ID {1} - Extra tasks are not allowed in this mode'
                        .format(mode, now))

                # Create key to be stored in arakoon and update kwargs with args
                timeout = kwargs.pop(
                    'ensure_single_timeout'
                ) if 'ensure_single_timeout' in kwargs else global_timeout
                function_info = inspect.getargspec(function)
                kwargs_dict = {}
                for index, arg in enumerate(args):
                    kwargs_dict[function_info.args[index]] = arg
                kwargs_dict.update(kwargs)
                params_info = 'with params {0}'.format(
                    kwargs_dict) if kwargs_dict else 'with default params'

                # Set the key in arakoon if non-existent
                value = update_value(key=persistent_key, append=True)

                # Validate whether another job with same params is being executed, skip if so
                for item in value['values'][
                        1:]:  # 1st element is processing job, we check all other queued jobs for identical params
                    if item['kwargs'] == kwargs_dict:
                        log_message(
                            'Execution of task {0} {1} discarded because of identical parameters'
                            .format(task_name, params_info))
                        return None
                log_message('New task {0} {1} scheduled for execution'.format(
                    task_name, params_info))
                update_value(key=persistent_key,
                             append=True,
                             value_to_update={
                                 'kwargs': kwargs_dict,
                                 'timestamp': now
                             })

                # Poll the arakoon to see whether this call is the first in list, if so --> execute, else wait
                first_element = None
                counter = 0
                while first_element != now and counter < timeout:
                    if persistent_client.exists(persistent_key):
                        value = persistent_client.get(persistent_key)
                        first_element = value['values'][0]['timestamp']

                    if first_element == now:
                        try:
                            if counter != 0:
                                current_time = int(time.time())
                                starting_time = int(now.split('_')[0])
                                log_message(
                                    'Task {0} {1} had to wait {2} seconds before being able to start'
                                    .format(task_name, params_info,
                                            current_time - starting_time))
                            output = function(*args, **kwargs)
                            log_message(
                                'Task {0} finished successfully'.format(
                                    task_name))
                        finally:
                            update_value(key=persistent_key, append=False)
                        return output
                    counter += 1
                    time.sleep(1)
                    if counter == timeout:
                        update_value(key=persistent_key, append=False)
                        log_message(
                            'Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'
                            .format(task_name, params_info, timeout),
                            level='error')
                        raise EnsureSingleTimeoutReached(
                            'Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'
                            .format(mode, now, task_name, timeout))
            else:
                raise ValueError(
                    'Unsupported mode "{0}" provided'.format(mode))
Пример #42
0
        def new_function(*args, **kwargs):
            """
            Wrapped function
            :param args: Arguments without default values
            :param kwargs: Arguments with default values
            """
            def log_message(message, level='info'):
                """
                Log a message with some additional information
                :param message: Message to log
                :param level:   Log level
                :return:        None
                """
                if level not in ('info', 'warning', 'debug', 'error', 'exception'):
                    raise ValueError('Unsupported log level "{0}" specified'.format(level))
                complete_message = 'Ensure single {0} mode - ID {1} - {2}'.format(mode, now, message)
                getattr(logger, level)(complete_message)

            def update_value(key, append, value_to_update=None):
                """
                Store the specified value in the PersistentFactory
                :param key:             Key to store the value for
                :param append:          If True, the specified value will be appended else element at index 0 will be popped
                :param value_to_update: Value to append to the list or remove from the list
                :return:                Updated value
                """
                with volatile_mutex(name=key, wait=5):
                    if persistent_client.exists(key):
                        val = persistent_client.get(key)
                        if append is True and value_to_update is not None:
                            val['values'].append(value_to_update)
                        elif append is False and value_to_update is not None:
                            for value_item in val['values']:
                                if value_item == value_to_update:
                                    val['values'].remove(value_item)
                                    break
                        elif append is False and len(val['values']) > 0:
                            val['values'].pop(0)
                        log_message('Amount of jobs pending for key {0}: {1}'.format(key, len(val['values'])))
                        for kwarg in val['values']:
                            log_message('  KWARGS: {0}'.format(kwarg['kwargs']))
                    else:
                        log_message('Setting initial value for key {0}'.format(key))
                        val = {'mode': mode,
                               'values': []}
                    persistent_client.set(key, val)
                return val

            now = '{0}_{1}'.format(int(time.time()), ''.join(random.choice(string.ascii_letters + string.digits) for _ in range(10)))
            task_names = [task_name] if extra_task_names is None else [task_name] + extra_task_names
            persistent_key = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task_name)
            persistent_client = PersistentFactory.get_client()

            if mode == 'DEFAULT':
                with volatile_mutex(persistent_key, wait=5):
                    for task in task_names:
                        key_to_check = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task)
                        if persistent_client.exists(key_to_check):
                            log_message('Execution of task {0} discarded'.format(task_name))
                            return None
                    log_message('Setting key {0}'.format(persistent_key))
                    persistent_client.set(persistent_key, {'mode': mode})

                try:
                    output = function(*args, **kwargs)
                    log_message('Task {0} finished successfully'.format(task_name))
                    return output
                finally:
                    with volatile_mutex(persistent_key, wait=5):
                        if persistent_client.exists(persistent_key):
                            log_message('Deleting key {0}'.format(persistent_key))
                            persistent_client.delete(persistent_key)
            elif mode == 'DEDUPED':
                with volatile_mutex(persistent_key, wait=5):
                    if extra_task_names is not None:
                        for task in extra_task_names:
                            key_to_check = '{0}_{1}'.format(ENSURE_SINGLE_KEY, task)
                            if persistent_client.exists(key_to_check):
                                log_message('Execution of task {0} discarded'.format(task_name))
                                return None
                    log_message('Setting key {0}'.format(persistent_key))

                # Update kwargs with args
                timeout = kwargs.pop('ensure_single_timeout') if 'ensure_single_timeout' in kwargs else global_timeout
                function_info = inspect.getargspec(function)
                kwargs_dict = {}
                for index, arg in enumerate(args):
                    kwargs_dict[function_info.args[index]] = arg
                kwargs_dict.update(kwargs)
                params_info = 'with params {0}'.format(kwargs_dict) if kwargs_dict else 'with default params'

                # Set the key in arakoon if non-existent
                value = update_value(key=persistent_key,
                                     append=True)

                # Validate whether another job with same params is being executed
                job_counter = 0
                for item in value['values']:
                    if item['kwargs'] == kwargs_dict:
                        job_counter += 1
                        if job_counter == 2:  # 1st job with same params is being executed, 2nd is scheduled for execution ==> Discard current
                            log_message('Execution of task {0} {1} discarded because of identical parameters'.format(task_name, params_info))
                            return None
                log_message('New task {0} {1} scheduled for execution'.format(task_name, params_info))
                update_value(key=persistent_key,
                             append=True,
                             value_to_update={'kwargs': kwargs_dict})

                # Poll the arakoon to see whether this call is the only in list, if so --> execute, else wait
                counter = 0
                while counter < timeout:
                    if persistent_client.exists(persistent_key):
                        values = persistent_client.get(persistent_key)['values']
                        queued_jobs = [v for v in values if v['kwargs'] == kwargs_dict]
                        if len(queued_jobs) == 1:
                            try:
                                if counter != 0:
                                    current_time = int(time.time())
                                    starting_time = int(now.split('_')[0])
                                    log_message('Task {0} {1} had to wait {2} seconds before being able to start'.format(task_name,
                                                                                                                         params_info,
                                                                                                                         current_time - starting_time))
                                output = function(*args, **kwargs)
                                log_message('Task {0} finished successfully'.format(task_name))
                                return output
                            finally:
                                update_value(key=persistent_key,
                                             append=False,
                                             value_to_update={'kwargs': kwargs_dict})
                        counter += 1
                        time.sleep(1)
                        if counter == timeout:
                            update_value(key=persistent_key,
                                         append=False,
                                         value_to_update={'kwargs': kwargs_dict})
                            log_message('Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'.format(task_name, params_info, timeout),
                                        level='error')
                            raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode,
                                                                                                                                                             now,
                                                                                                                                                             task_name,
                                                                                                                                                             timeout))
            elif mode == 'CHAINED':
                if extra_task_names is not None:
                    log_message('Extra tasks are not allowed in this mode',
                                level='error')
                    raise ValueError('Ensure single {0} mode - ID {1} - Extra tasks are not allowed in this mode'.format(mode, now))

                # Create key to be stored in arakoon and update kwargs with args
                timeout = kwargs.pop('ensure_single_timeout') if 'ensure_single_timeout' in kwargs else global_timeout
                function_info = inspect.getargspec(function)
                kwargs_dict = {}
                for index, arg in enumerate(args):
                    kwargs_dict[function_info.args[index]] = arg
                kwargs_dict.update(kwargs)
                params_info = 'with params {0}'.format(kwargs_dict) if kwargs_dict else 'with default params'

                # Set the key in arakoon if non-existent
                value = update_value(key=persistent_key,
                                     append=True)

                # Validate whether another job with same params is being executed, skip if so
                for item in value['values'][1:]:  # 1st element is processing job, we check all other queued jobs for identical params
                    if item['kwargs'] == kwargs_dict:
                        log_message('Execution of task {0} {1} discarded because of identical parameters'.format(task_name, params_info))
                        return None
                log_message('New task {0} {1} scheduled for execution'.format(task_name, params_info))
                update_value(key=persistent_key,
                             append=True,
                             value_to_update={'kwargs': kwargs_dict,
                                              'timestamp': now})

                # Poll the arakoon to see whether this call is the first in list, if so --> execute, else wait
                first_element = None
                counter = 0
                while counter < timeout:
                    if persistent_client.exists(persistent_key):
                        value = persistent_client.get(persistent_key)
                        first_element = value['values'][0]['timestamp'] if len(value['values']) > 0 else None

                    if first_element == now:
                        output = None
                        try:
                            if counter != 0:
                                current_time = int(time.time())
                                starting_time = int(now.split('_')[0])
                                log_message('Task {0} {1} had to wait {2} seconds before being able to start'.format(task_name,
                                                                                                                     params_info,
                                                                                                                     current_time - starting_time))
                            output = function(*args, **kwargs)
                            log_message('Task {0} finished successfully'.format(task_name))
                        except Exception:
                            log_message('Task {0} {1} failed'.format(task_name, params_info), level='exception')
                            raise
                        finally:
                            update_value(key=persistent_key,
                                         append=False)
                        return output
                    counter += 1
                    time.sleep(1)
                    if counter == timeout:
                        update_value(key=persistent_key,
                                     append=False)
                        log_message('Could not start task {0} {1}, within expected time ({2}s). Removed it from queue'.format(task_name, params_info, timeout),
                                    level='error')
                        raise EnsureSingleTimeoutReached('Ensure single {0} mode - ID {1} - Task {2} could not be started within timeout of {3}s'.format(mode,
                                                                                                                                                         now,
                                                                                                                                                         task_name,
                                                                                                                                                         timeout))
            else:
                raise ValueError('Unsupported mode "{0}" provided'.format(mode))
Пример #43
0
    def create_cluster(cluster_name, cluster_type, ip, base_dir, plugins=None, locked=True, internal=True, claim=False):
        """
        Always creates a cluster but marks it's usage according to the internal flag
        :param cluster_name: Name of the cluster
        :type cluster_name: str

        :param cluster_type: Type of the cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str

        :param ip: IP address of the first node of the new cluster
        :type ip: str

        :param base_dir: Base directory that should contain the data and tlogs
        :type base_dir: str

        :param plugins: Plugins that should be added to the configuration file
        :type plugins: list

        :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool

        :param internal: Is cluster internally managed by OVS
        :type internal: bool

        :param claim: Claim the cluster right away
        :type claim: bool

        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Cluster type {0} is not supported. Please choose from {1}'.format(cluster_type, ', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))

        if EtcdConfiguration.dir_exists('/ovs/arakoon/{0}'.format(cluster_name)):
            raise ValueError('An Arakoon cluster with name "{0}" already exists'.format(cluster_name))

        ArakoonInstaller._logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip))
        base_dir = base_dir.rstrip('/')

        client = SSHClient(ip, username=ArakoonInstaller.SSHCLIENT_USER)
        if ArakoonInstaller.is_running(cluster_name, client):
            ArakoonInstaller._logger.info('Arakoon service running for cluster {0}'.format(cluster_name))
            config = ArakoonClusterConfig(cluster_name, plugins)
            config.load_config()
            for node in config.nodes:
                if node.ip == ip:
                    return {'client_port': node.client_port,
                            'messaging_port': node.messaging_port}

        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        log_dir = ArakoonInstaller.ARAKOON_LOG_DIR.format(cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(ip, {log_dir: True,
                                                          home_dir: False,
                                                          tlog_dir: False})

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(ip))
                port_mutex.acquire(wait=60)
            ports = ArakoonInstaller._get_free_ports(client)
            config = ArakoonClusterConfig(cluster_name, plugins)
            config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                  ip=ip,
                                                  client_port=ports[0],
                                                  messaging_port=ports[1],
                                                  log_dir=log_dir,
                                                  home=home_dir,
                                                  tlog_dir=tlog_dir))
            ArakoonInstaller._deploy(config)

            metadata = ArakoonClusterMetadata(cluster_id=cluster_name)
            metadata.internal = internal
            metadata.cluster_type = cluster_type.upper()
            metadata.write()
            if claim is True:
                metadata.claim()
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Creating cluster {0} on {1} completed'.format(cluster_name, ip))
        return {'metadata': metadata,
                'client_port': ports[0],
                'messaging_port': ports[1]}
Пример #44
0
    def update_from_voldrv(name, storagedriver_id):
        """
        This method will update/create a vmachine based on a given vmx/xml file
        :param name: Name of the vmx
        :param storagedriver_id: Storage Driver hosting the vmachine
        """

        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        name = hypervisor.clean_vmachine_filename(name)
        storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
        vpool = storagedriver.vpool
        machine_ids = [storagedriver.storagerouter.machine_id for storagedriver in vpool.storagedrivers]

        if hypervisor.should_process(name, machine_ids=machine_ids):
            if pmachine.hvtype == 'VMWARE':
                storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
                vpool = storagedriver.vpool
            else:
                vpool = None
            pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
            mutex = volatile_mutex('{0}_{1}'.format(name, vpool.guid if vpool is not None else 'none'))
            try:
                mutex.acquire(wait=120)
                limit = 5
                exists = hypervisor.file_exists(storagedriver, name)
                while limit > 0 and exists is False:
                    time.sleep(1)
                    exists = hypervisor.file_exists(storagedriver, name)
                    limit -= 1
                if exists is False:
                    VMachineController._logger.info('Could not locate vmachine with name {0} on vpool {1}'.format(name, vpool.name))
                    vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool)
                    if vmachine is not None:
                        VMachineController.delete_from_voldrv(name, storagedriver_id=storagedriver_id)
                    return
            finally:
                mutex.release()
            try:
                mutex.acquire(wait=5)
                vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool)
                if not vmachine:
                    vmachines = VMachineList.get_vmachine_by_name(name)
                    if vmachines is not None:
                        vmachine = vmachines[0]
                if not vmachine:
                    vmachine = VMachine()
                    vmachine.vpool = vpool
                    vmachine.pmachine = pmachine
                    vmachine.status = 'CREATED'
                vmachine.devicename = name
                vmachine.save()
            finally:
                mutex.release()

            if pmachine.hvtype == 'KVM':
                try:
                    mutex.acquire(wait=120)
                    VMachineController.sync_with_hypervisor(vmachine.guid, storagedriver_id=storagedriver_id)
                    vmachine.status = 'SYNC'
                except:
                    vmachine.status = 'SYNC_NOK'
                finally:
                    mutex.release()
                vmachine.save()
        else:
            VMachineController._logger.info('Ignored invalid file {0}'.format(name))
Пример #45
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        :param vmachine: Virtual Machine to update
        :param vm_object: New virtual machine info
        :param pmachine: Physical machine of the virtual machine
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename))
            for disk in vm_object['disks']:
                ensure_safety = False
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        try:
                            mutex.acquire(wait=10)
                            vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                            if vdisk is None:
                                # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                                vdisk = VDisk()
                                vdisk.vpool = datastores[datastore].vpool
                                vdisk.reload_client()
                                vdisk.devicename = disk['filename']
                                vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                                vdisk.size = vdisk.info['volume_size']
                                vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                                  'cluster_multiplier': vdisk.info['cluster_multiplier']}
                                # Create the disk in a locked context, but don't execute long running-task in same context
                                vdisk.save()
                                ensure_safety = True
                        finally:
                            mutex.release()
                        if ensure_safety:
                            MDSServiceController.ensure_safety(vdisk)
                            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)

                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise
Пример #46
0
    def update_from_voldrv(name, storagedriver_id):
        """
        This method will update/create a vmachine based on a given vmx/xml file
        :param name: Name of the vmx
        :param storagedriver_id: Storage Driver hosting the vmachine
        """

        pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
        if pmachine.hvtype not in ['VMWARE', 'KVM']:
            return

        hypervisor = Factory.get(pmachine)
        name = hypervisor.clean_vmachine_filename(name)
        storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
        vpool = storagedriver.vpool
        machine_ids = [storagedriver.storagerouter.machine_id for storagedriver in vpool.storagedrivers]

        if hypervisor.should_process(name, machine_ids=machine_ids):
            if pmachine.hvtype == 'VMWARE':
                storagedriver = StorageDriverList.get_by_storagedriver_id(storagedriver_id)
                vpool = storagedriver.vpool
            else:
                vpool = None
            pmachine = PMachineList.get_by_storagedriver_id(storagedriver_id)
            mutex = volatile_mutex('{0}_{1}'.format(name, vpool.guid if vpool is not None else 'none'))
            try:
                mutex.acquire(wait=120)
                limit = 5
                exists = hypervisor.file_exists(storagedriver, name)
                while limit > 0 and exists is False:
                    time.sleep(1)
                    exists = hypervisor.file_exists(storagedriver, name)
                    limit -= 1
                if exists is False:
                    VMachineController._logger.info('Could not locate vmachine with name {0} on vpool {1}'.format(name, vpool.name))
                    vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool)
                    if vmachine is not None:
                        VMachineController.delete_from_voldrv(name, storagedriver_id=storagedriver_id)
                    return
            finally:
                mutex.release()
            try:
                mutex.acquire(wait=5)
                vmachine = VMachineList.get_by_devicename_and_vpool(name, vpool)
                if not vmachine:
                    vmachines = VMachineList.get_vmachine_by_name(name)
                    if vmachines is not None:
                        vmachine = vmachines[0]
                if not vmachine:
                    vmachine = VMachine()
                    vmachine.vpool = vpool
                    vmachine.pmachine = pmachine
                    vmachine.status = 'CREATED'
                vmachine.devicename = name
                vmachine.save()
            finally:
                mutex.release()

            if pmachine.hvtype == 'KVM':
                try:
                    mutex.acquire(wait=120)
                    VMachineController.sync_with_hypervisor(vmachine.guid, storagedriver_id=storagedriver_id)
                    vmachine.status = 'SYNC'
                except:
                    vmachine.status = 'SYNC_NOK'
                finally:
                    mutex.release()
                vmachine.save()
        else:
            VMachineController._logger.info('Ignored invalid file {0}'.format(name))
Пример #47
0
    def extend_cluster(master_ip, new_ip, cluster_name, base_dir, locked=True, filesystem=False, ports=None):
        """
        Extends a cluster to a given new node
        :param master_ip: IP of one of the already existing nodes
        :type master_ip: str
        :param new_ip: IP address of the node to be added
        :type new_ip: str
        :param cluster_name: Name of the cluster to be extended
        :type cluster_name: str
        :param base_dir: Base directory that will hold the db and tlogs
        :type base_dir: str
        :param locked: Indicates whether the extend should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool
        :param filesystem: Indicates whether the configuration should be on the filesystem or in a configuration cluster
        :type filesystem: bool
        :param ports: A list of ports to be used for this cluster's node
        :type ports: list
        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2}'.format(cluster_name, master_ip, new_ip))
        base_dir = base_dir.rstrip('/')

        config = ArakoonClusterConfig(cluster_name, filesystem)
        config.load_config(master_ip)
        cluster_type = ArakoonInstaller.get_arakoon_metadata_by_cluster_name(cluster_name=config.cluster_id, filesystem=filesystem, ip=master_ip)['cluster_type']

        client = SSHClient(new_ip, username=ArakoonInstaller.SSHCLIENT_USER)
        node_name = System.get_my_machine_id(client)

        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(new_ip, [home_dir, tlog_dir])

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(new_ip))
                port_mutex.acquire(wait=60)
            if ports is None:
                ports = ArakoonInstaller._get_free_ports(client)
            if node_name not in [node.name for node in config.nodes]:
                config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                      ip=new_ip,
                                                      client_port=ports[0],
                                                      messaging_port=ports[1],
                                                      log_sinks=LogHandler.get_sink_path('arakoon_server'),
                                                      crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'),
                                                      home=home_dir,
                                                      tlog_dir=tlog_dir))
            service_metadata = ArakoonInstaller._deploy(config=config,
                                                        filesystem=filesystem,
                                                        delay_service_registration=cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG)[new_ip]
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Extending cluster {0} from {1} to {2} completed'.format(cluster_name, master_ip, new_ip))
        return {'client_port': ports[0],
                'messaging_port': ports[1],
                'service_metadata': service_metadata,
                'ips': [node.ip for node in config.nodes]}
Пример #48
0
    def _deploy_stack_and_scrub(queue, vpool, scrub_info, error_messages):
        """
        Executes scrub work for a given vDisk queue and vPool, based on scrub_info
        :param queue: a Queue with vDisk guids that need to be scrubbed (they should only be member of a single vPool)
        :type queue: Queue
        :param vpool: the vPool object of the vDisks
        :type vpool: VPool
        :param scrub_info: A dict containing scrub information:
                           `scrub_path` with the path where to scrub
                           `storage_router` with the StorageRouter that needs to do the work
        :type scrub_info: dict
        :param error_messages: A list of error messages to be filled (by reference)
        :type error_messages: list
        :return: None
        :rtype: NoneType
        """
        if len(vpool.storagedrivers
               ) == 0 or not vpool.storagedrivers[0].storagedriver_id:
            error_messages.append(
                'vPool {0} does not have any valid StorageDrivers configured'.
                format(vpool.name))
            return

        service_manager = ServiceFactory.get_manager()
        client = None
        lock_time = 5 * 60
        storagerouter = scrub_info['storage_router']
        partition_guid = scrub_info['partition_guid']
        alba_proxy_service = 'ovs-albaproxy_{0}_{1}_{2}_scrub'.format(
            vpool.name, storagerouter.name, partition_guid)
        scrub_directory = '{0}/scrub_work_{1}_{2}'.format(
            scrub_info['scrub_path'], vpool.name, partition_guid)
        scrub_config_key = 'ovs/vpools/{0}/proxies/scrub/scrub_config_{1}'.format(
            vpool.guid, partition_guid)
        backend_config_key = 'ovs/vpools/{0}/proxies/scrub/backend_config_{1}'.format(
            vpool.guid, partition_guid)

        # Deploy a proxy
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                GenericController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Deploying ALBA proxy {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_create(scrub_directory)
                client.dir_chmod(
                    scrub_directory, 0777
                )  # Celery task executed by 'ovs' user and should be able to write in it
                if service_manager.has_service(
                        name=alba_proxy_service, client=client
                ) is True and service_manager.get_service_status(
                        name=alba_proxy_service, client=client) == 'active':
                    GenericController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Re-using existing proxy service {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))
                    scrub_config = Configuration.get(scrub_config_key)
                else:
                    machine_id = System.get_my_machine_id(client)
                    port_range = Configuration.get(
                        '/ovs/framework/hosts/{0}/ports|storagedriver'.format(
                            machine_id))
                    with volatile_mutex('deploy_proxy_for_scrub_{0}'.format(
                            storagerouter.guid),
                                        wait=30):
                        port = System.get_free_ports(selected_range=port_range,
                                                     nr=1,
                                                     client=client)[0]
                    scrub_config = Configuration.get(
                        'ovs/vpools/{0}/proxies/scrub/generic_scrub'.format(
                            vpool.guid))
                    scrub_config['port'] = port
                    scrub_config['transport'] = 'tcp'
                    Configuration.set(scrub_config_key,
                                      json.dumps(scrub_config, indent=4),
                                      raw=True)

                    params = {
                        'VPOOL_NAME':
                        vpool.name,
                        'LOG_SINK':
                        LogHandler.get_sink_path(alba_proxy_service),
                        'CONFIG_PATH':
                        Configuration.get_configuration_path(scrub_config_key)
                    }
                    service_manager.add_service(name='ovs-albaproxy',
                                                params=params,
                                                client=client,
                                                target_name=alba_proxy_service)
                    service_manager.start_service(name=alba_proxy_service,
                                                  client=client)
                    GenericController._logger.info(
                        'Scrubber - vPool {0} - StorageRouter {1} - Deployed ALBA proxy {2}'
                        .format(vpool.name, storagerouter.name,
                                alba_proxy_service))

                backend_config = Configuration.get(
                    'ovs/vpools/{0}/hosts/{1}/config'.format(
                        vpool.guid, vpool.storagedrivers[0].storagedriver_id
                    ))['backend_connection_manager']
                if backend_config.get('backend_type') != 'MULTI':
                    backend_config['alba_connection_host'] = '127.0.0.1'
                    backend_config['alba_connection_port'] = scrub_config[
                        'port']
                else:
                    for value in backend_config.itervalues():
                        if isinstance(value, dict):
                            value['alba_connection_host'] = '127.0.0.1'
                            value['alba_connection_port'] = scrub_config[
                                'port']
                # Copy backend connection manager information in separate key
                Configuration.set(
                    backend_config_key,
                    json.dumps({"backend_connection_manager": backend_config},
                               indent=4),
                    raw=True)
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - An error occurred deploying ALBA proxy {2}'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            GenericController._logger.exception(message)
            if client is not None and service_manager.has_service(
                    name=alba_proxy_service, client=client) is True:
                if service_manager.get_service_status(
                        name=alba_proxy_service, client=client) == 'active':
                    service_manager.stop_service(name=alba_proxy_service,
                                                 client=client)
                service_manager.remove_service(name=alba_proxy_service,
                                               client=client)
            if Configuration.exists(scrub_config_key):
                Configuration.delete(scrub_config_key)

        # Execute the actual scrubbing
        threads = []
        threads_key = '/ovs/framework/hosts/{0}/config|scrub_stack_threads'.format(
            storagerouter.machine_id)
        amount_threads = Configuration.get(
            key=threads_key) if Configuration.exists(key=threads_key) else 2
        if not isinstance(amount_threads, int):
            error_messages.append(
                'Amount of threads to spawn must be an integer for StorageRouter with ID {0}'
                .format(storagerouter.machine_id))
            return

        amount_threads = max(amount_threads,
                             1)  # Make sure amount_threads is at least 1
        amount_threads = min(min(queue.qsize(), amount_threads),
                             20)  # Make sure amount threads is max 20
        GenericController._logger.info(
            'Scrubber - vPool {0} - StorageRouter {1} - Spawning {2} threads for proxy service {3}'
            .format(vpool.name, storagerouter.name, amount_threads,
                    alba_proxy_service))
        for index in range(amount_threads):
            thread = Thread(name='execute_scrub_{0}_{1}_{2}'.format(
                vpool.guid, partition_guid, index),
                            target=GenericController._execute_scrub,
                            args=(queue, vpool, scrub_info, scrub_directory,
                                  error_messages))
            thread.start()
            threads.append(thread)
        for thread in threads:
            thread.join()

        # Delete the proxy again
        try:
            with file_mutex(name='ovs_albaproxy_scrub', wait=lock_time):
                GenericController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
                client = SSHClient(storagerouter, 'root')
                client.dir_delete(scrub_directory)
                if service_manager.has_service(alba_proxy_service,
                                               client=client):
                    service_manager.stop_service(alba_proxy_service,
                                                 client=client)
                    service_manager.remove_service(alba_proxy_service,
                                                   client=client)
                if Configuration.exists(scrub_config_key):
                    Configuration.delete(scrub_config_key)
                GenericController._logger.info(
                    'Scrubber - vPool {0} - StorageRouter {1} - Removed service {2}'
                    .format(vpool.name, storagerouter.name,
                            alba_proxy_service))
        except Exception:
            message = 'Scrubber - vPool {0} - StorageRouter {1} - Removing service {2} failed'.format(
                vpool.name, storagerouter.name, alba_proxy_service)
            error_messages.append(message)
            GenericController._logger.exception(message)
Пример #49
0
    def create_cluster(cluster_name, cluster_type, ip, base_dir, plugins=None, locked=True, internal=True, filesystem=False, ports=None):
        """
        Always creates a cluster but marks it's usage according to the internal flag
        :param cluster_name: Name of the cluster
        :type cluster_name: str
        :param cluster_type: Type of the cluster (See ServiceType.ARAKOON_CLUSTER_TYPES)
        :type cluster_type: str
        :param ip: IP address of the first node of the new cluster
        :type ip: str
        :param base_dir: Base directory that should contain the data and tlogs
        :type base_dir: str
        :param plugins: Plugins that should be added to the configuration file
        :type plugins: dict
        :param locked: Indicates whether the create should run in a locked context (e.g. to prevent port conflicts)
        :type locked: bool
        :param internal: Is cluster internally managed by OVS
        :type internal: bool
        :param filesystem: Indicates whether the configuration should be on the filesystem or in a configuration cluster
        :type filesystem: bool
        :param ports: A list of ports to be used for this cluster's node
        :type ports: list
        :return: Ports used by arakoon cluster
        :rtype: dict
        """
        if cluster_type not in ServiceType.ARAKOON_CLUSTER_TYPES:
            raise ValueError('Cluster type {0} is not supported. Please choose from {1}'.format(cluster_type, ', '.join(ServiceType.ARAKOON_CLUSTER_TYPES)))

        client = SSHClient(ip, username=ArakoonInstaller.SSHCLIENT_USER)
        if filesystem is True:
            exists = client.file_exists(ArakoonClusterConfig.CONFIG_FILE.format(cluster_name))
        else:
            exists = Configuration.dir_exists('/ovs/arakoon/{0}'.format(cluster_name))
        if exists is True:
            raise ValueError('An Arakoon cluster with name "{0}" already exists'.format(cluster_name))

        ArakoonInstaller._logger.debug('Creating cluster {0} on {1}'.format(cluster_name, ip))

        node_name = System.get_my_machine_id(client)
        base_dir = base_dir.rstrip('/')
        home_dir = ArakoonInstaller.ARAKOON_HOME_DIR.format(base_dir, cluster_name)
        tlog_dir = ArakoonInstaller.ARAKOON_TLOG_DIR.format(base_dir, cluster_name)
        ArakoonInstaller.clean_leftover_arakoon_data(ip, [home_dir, tlog_dir])

        port_mutex = None
        try:
            if locked is True:
                from ovs.extensions.generic.volatilemutex import volatile_mutex
                port_mutex = volatile_mutex('arakoon_install_ports_{0}'.format(ip))
                port_mutex.acquire(wait=60)
            if ports is None:
                ports = ArakoonInstaller._get_free_ports(client)
            config = ArakoonClusterConfig(cluster_name, filesystem, plugins.keys() if plugins is not None else None)
            config.nodes.append(ArakoonNodeConfig(name=node_name,
                                                  ip=ip,
                                                  client_port=ports[0],
                                                  messaging_port=ports[1],
                                                  log_sinks=LogHandler.get_sink_path('arakoon_server'),
                                                  crash_log_sinks=LogHandler.get_sink_path('arakoon_server_crash'),
                                                  home=home_dir,
                                                  tlog_dir=tlog_dir))
            metadata = {'internal': internal,
                        'cluster_name': cluster_name,
                        'cluster_type': cluster_type.upper(),
                        'in_use': False}
            service_metadata = ArakoonInstaller._deploy(config=config,
                                                        filesystem=filesystem,
                                                        plugins=plugins.values() if plugins is not None else None,
                                                        delay_service_registration=cluster_type == ServiceType.ARAKOON_CLUSTER_TYPES.CFG)[ip]
        finally:
            if port_mutex is not None:
                port_mutex.release()

        ArakoonInstaller._logger.debug('Creating cluster {0} on {1} completed'.format(cluster_name, ip))
        return {'metadata': metadata,
                'client_port': ports[0],
                'messaging_port': ports[1],
                'service_metadata': service_metadata}
Пример #50
0
    def add_vpool(cls, parameters):
        """
        Add a vPool to the machine this task is running on
        :param parameters: Parameters for vPool creation
        :type parameters: dict
        :return: None
        :rtype: NoneType
        """
        # TODO: Add logging
        cls._logger.debug('Adding vpool. Parameters: {}'.format(parameters))
        # VALIDATIONS
        if not isinstance(parameters, dict):
            raise ValueError(
                'Parameters passed to create a vPool should be of type dict')

        # Check StorageRouter existence
        storagerouter = StorageRouterList.get_by_ip(
            ip=parameters.get('storagerouter_ip'))
        if storagerouter is None:
            raise RuntimeError('Could not find StorageRouter')

        # Validate requested vPool configurations
        vp_installer = VPoolInstaller(name=parameters.get('vpool_name'))
        vp_installer.validate(storagerouter=storagerouter)

        # Validate requested StorageDriver configurations
        cls._logger.info(
            'vPool {0}: Validating StorageDriver configurations'.format(
                vp_installer.name))
        sd_installer = StorageDriverInstaller(
            vp_installer=vp_installer,
            configurations={
                'storage_ip': parameters.get('storage_ip'),
                'caching_info': parameters.get('caching_info'),
                'backend_info': {
                    'main':
                    parameters.get('backend_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('backend_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('backend_info_fc')
                },
                'connection_info': {
                    'main':
                    parameters.get('connection_info'),
                    StorageDriverConfiguration.CACHE_BLOCK:
                    parameters.get('connection_info_bc'),
                    StorageDriverConfiguration.CACHE_FRAGMENT:
                    parameters.get('connection_info_fc')
                },
                'sd_configuration': parameters.get('config_params')
            })

        partitions_mutex = volatile_mutex('add_vpool_partitions_{0}'.format(
            storagerouter.guid))
        try:
            # VPOOL CREATION
            # Create the vPool as soon as possible in the process to be displayed in the GUI (INSTALLING/EXTENDING state)
            if vp_installer.is_new is True:
                vp_installer.create(rdma_enabled=sd_installer.rdma_enabled)
                vp_installer.configure_mds(
                    config=parameters.get('mds_config_params', {}))
            else:
                vp_installer.update_status(status=VPool.STATUSES.EXTENDING)

            # ADDITIONAL VALIDATIONS
            # Check StorageRouter connectivity
            cls._logger.info(
                'vPool {0}: Validating StorageRouter connectivity'.format(
                    vp_installer.name))
            linked_storagerouters = [storagerouter]
            if vp_installer.is_new is False:
                linked_storagerouters += [
                    sd.storagerouter
                    for sd in vp_installer.vpool.storagedrivers
                ]

            sr_client_map = SSHClient.get_clients(
                endpoints=linked_storagerouters, user_names=['ovs', 'root'])
            offline_nodes = sr_client_map.pop('offline')
            if storagerouter in offline_nodes:
                raise RuntimeError(
                    'Node on which the vPool is being {0} is not reachable'.
                    format('created'
                           if vp_installer.is_new is True else 'extended'))

            sr_installer = StorageRouterInstaller(
                root_client=sr_client_map[storagerouter]['root'],
                sd_installer=sd_installer,
                vp_installer=vp_installer,
                storagerouter=storagerouter)

            # When 2 or more jobs simultaneously run on the same StorageRouter, we need to check and create the StorageDriver partitions in locked context
            partitions_mutex.acquire(wait=60)
            sr_installer.partition_info = StorageRouterController.get_partition_info(
                storagerouter_guid=storagerouter.guid)
            sr_installer.validate_vpool_extendable()
            sr_installer.validate_global_write_buffer(
                requested_size=parameters.get('writecache_size', 0))
            sr_installer.validate_local_cache_size(
                requested_proxies=parameters.get('parallelism', {}).get(
                    'proxies', 2))

            # MODEL STORAGEDRIVER AND PARTITION JUNCTIONS
            sd_installer.create()
            sd_installer.create_partitions()
            partitions_mutex.release()

            vp_installer.refresh_metadata()
        except Exception:
            cls._logger.exception(
                'Something went wrong during the validation or modeling of vPool {0} on StorageRouter {1}'
                .format(vp_installer.name, storagerouter.name))
            partitions_mutex.release()
            vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            raise

        # Arakoon setup
        counter = 0
        while counter < 300:
            try:
                if StorageDriverController.manual_voldrv_arakoon_checkup(
                ) is True:
                    break
            except Exception:
                cls._logger.exception(
                    'Arakoon checkup for voldrv cluster failed')
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise
            counter += 1
            time.sleep(1)
            if counter == 300:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
                raise RuntimeError(
                    'Arakoon checkup for the StorageDriver cluster could not be started'
                )

        # Cluster registry
        try:
            vp_installer.configure_cluster_registry(allow_raise=True)
        except Exception:
            if vp_installer.is_new is True:
                vp_installer.revert_vpool(status=VPool.STATUSES.RUNNING)
            else:
                vp_installer.revert_vpool(status=VPool.STATUSES.FAILURE)
            raise

        try:
            sd_installer.setup_proxy_configs()
            sd_installer.configure_storagedriver_service()
            DiskController.sync_with_reality(storagerouter.guid)
            MDSServiceController.prepare_mds_service(
                storagerouter=storagerouter, vpool=vp_installer.vpool)

            # Update the MDS safety if changed via API (vpool.configuration will be available at this point also for the newly added StorageDriver)
            vp_installer.vpool.invalidate_dynamics('configuration')
            if vp_installer.mds_safety is not None and vp_installer.vpool.configuration[
                    'mds_config']['mds_safety'] != vp_installer.mds_safety:
                Configuration.set(
                    key='/ovs/vpools/{0}/mds_config|mds_safety'.format(
                        vp_installer.vpool.guid),
                    value=vp_installer.mds_safety)

            sd_installer.start_services(
            )  # Create and start watcher volumedriver, DTL, proxies and StorageDriver services

            # Post creation/extension checkups
            mds_config_set = MDSServiceController.get_mds_storagedriver_config_set(
                vpool=vp_installer.vpool, offline_nodes=offline_nodes)
            for sr, clients in sr_client_map.iteritems():
                for current_storagedriver in [
                        sd for sd in sr.storagedrivers
                        if sd.vpool_guid == vp_installer.vpool.guid
                ]:
                    storagedriver_config = StorageDriverConfiguration(
                        vpool_guid=vp_installer.vpool.guid,
                        storagedriver_id=current_storagedriver.storagedriver_id
                    )
                    if storagedriver_config.config_missing is False:
                        # Filesystem section in StorageDriver configuration are all parameters used for vDisks created directly on the filesystem
                        # So when a vDisk gets created on the filesystem, these MDSes will be assigned to them
                        storagedriver_config.configure_filesystem(
                            fs_metadata_backend_mds_nodes=mds_config_set[
                                sr.guid])
                        storagedriver_config.save(client=clients['ovs'])

            # Everything's reconfigured, refresh new cluster configuration
            for current_storagedriver in vp_installer.vpool.storagedrivers:
                if current_storagedriver.storagerouter not in sr_client_map:
                    continue
                vp_installer.vpool.storagedriver_client.update_cluster_node_configs(
                    str(current_storagedriver.storagedriver_id),
                    req_timeout_secs=10)
        except Exception:
            cls._logger.exception('vPool {0}: Creation failed'.format(
                vp_installer.name))
            vp_installer.update_status(status=VPool.STATUSES.FAILURE)
            raise

        # When a node is offline, we can run into errors, but also when 1 or more volumes are not running
        # Scheduled tasks below, so don't really care whether they succeed or not
        try:
            VDiskController.dtl_checkup(vpool_guid=vp_installer.vpool.guid,
                                        ensure_single_timeout=600)
        except:
            pass
        for vdisk in vp_installer.vpool.vdisks:
            try:
                MDSServiceController.ensure_safety(vdisk_guid=vdisk.guid)
            except:
                pass
        vp_installer.update_status(status=VPool.STATUSES.RUNNING)
        cls._logger.info('Add vPool {0} ended successfully'.format(
            vp_installer.name))
Пример #51
0
    def clone(machineguid, timestamp, name):
        """
        Clone a vmachine using the disk snapshot based on a snapshot timestamp

        @param machineguid: guid of the machine to clone
        @param timestamp: timestamp of the disk snapshots to use for the clone
        @param name: name for the new machine
        """
        machine = VMachine(machineguid)
        timestamp = str(timestamp)
        if timestamp not in (snap['timestamp'] for snap in machine.snapshots):
            raise RuntimeError('Invalid timestamp provided, not a valid snapshot of this vmachine.')

        vpool = None
        storagerouter = None
        if machine.pmachine is not None and machine.pmachine.hvtype == 'VMWARE':
            for vdisk in machine.vdisks:
                if vdisk.vpool is not None:
                    vpool = vdisk.vpool
                    break
        for vdisk in machine.vdisks:
            if vdisk.storagerouter_guid:
                storagerouter = StorageRouter(vdisk.storagerouter_guid)
                break
        hv = Factory.get(machine.pmachine)
        vm_path = hv.get_vmachine_path(name, storagerouter.machine_id if storagerouter is not None else '')
        # mutex in sync_with_hypervisor uses "None" for KVM hvtype
        mutex = volatile_mutex('{0}_{1}'.format(hv.clean_vmachine_filename(vm_path), vpool.guid if vpool is not None else 'none'))

        disks = {}
        for snapshot in machine.snapshots:
            if snapshot['timestamp'] == timestamp:
                for diskguid, snapshotguid in snapshot['snapshots'].iteritems():
                    disks[diskguid] = snapshotguid

        try:
            mutex.acquire(wait=120)
            new_machine = VMachine()
            new_machine.copy(machine)
            new_machine.name = name
            new_machine.devicename = hv.clean_vmachine_filename(vm_path)
            new_machine.pmachine = machine.pmachine
            new_machine.save()
        finally:
            mutex.release()

        new_disk_guids = []
        vm_disks = []
        mountpoint = None
        disks_by_order = sorted(machine.vdisks, key=lambda x: x.order)
        try:
            for currentDisk in disks_by_order:
                if machine.is_vtemplate and currentDisk.templatesnapshot:
                    snapshotid = currentDisk.templatesnapshot
                else:
                    snapshotid = disks[currentDisk.guid]
                prefix = '%s-clone' % currentDisk.name

                result = VDiskController.clone(diskguid=currentDisk.guid,
                                               snapshotid=snapshotid,
                                               devicename=prefix,
                                               pmachineguid=new_machine.pmachine_guid,
                                               machinename=new_machine.name,
                                               machineguid=new_machine.guid)
                new_disk_guids.append(result['diskguid'])
                mountpoint = StorageDriverList.get_by_storagedriver_id(currentDisk.storagedriver_id).mountpoint
                vm_disks.append(result)
        except Exception as ex:
            VMachineController._logger.error('Failed to clone disks. {0}'.format(ex))
            VMachineController.delete(machineguid=new_machine.guid)
            raise

        try:
            result = hv.clone_vm(machine.hypervisor_id, name, vm_disks, mountpoint)
        except Exception as ex:
            VMachineController._logger.error('Failed to clone vm. {0}'.format(ex))
            VMachineController.delete(machineguid=new_machine.guid)
            raise

        try:
            mutex.acquire(wait=120)
            new_machine.hypervisor_id = result
            new_machine.save()
        finally:
            mutex.release()
        return new_machine.guid
Пример #52
0
    def update_vmachine_config(vmachine, vm_object, pmachine=None):
        """
        Update a vMachine configuration with a given vMachine configuration
        :param vmachine: Virtual Machine to update
        :param vm_object: New virtual machine info
        :param pmachine: Physical machine of the virtual machine
        """
        try:
            vdisks_synced = 0
            if vmachine.name is None:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_created',
                                        'metadata': {'name': vm_object['name']}})
            elif vmachine.name != vm_object['name']:
                MessageController.fire(MessageController.Type.EVENT,
                                       {'type': 'vmachine_renamed',
                                        'metadata': {'old_name': vmachine.name,
                                                     'new_name': vm_object['name']}})
            if pmachine is not None:
                vmachine.pmachine = pmachine
            vmachine.name = vm_object['name']
            vmachine.hypervisor_id = vm_object['id']
            vmachine.devicename = vm_object['backing']['filename']
            vmachine.save()
            # Updating and linking disks
            storagedrivers = StorageDriverList.get_storagedrivers()
            datastores = dict([('{0}:{1}'.format(storagedriver.storage_ip, storagedriver.mountpoint), storagedriver) for storagedriver in storagedrivers])
            vdisk_guids = []
            mutex = volatile_mutex('{0}_{1}'.format(vmachine.name, vmachine.devicename))
            for disk in vm_object['disks']:
                ensure_safety = False
                if disk['datastore'] in vm_object['datastores']:
                    datastore = vm_object['datastores'][disk['datastore']]
                    if datastore in datastores:
                        try:
                            mutex.acquire(wait=10)
                            vdisk = VDiskList.get_by_devicename_and_vpool(disk['filename'], datastores[datastore].vpool)
                            if vdisk is None:
                                # The disk couldn't be located, but is in our datastore. We might be in a recovery scenario
                                vdisk = VDisk()
                                vdisk.vpool = datastores[datastore].vpool
                                vdisk.reload_client()
                                vdisk.devicename = disk['filename']
                                vdisk.volume_id = vdisk.storagedriver_client.get_volume_id(str(disk['backingfilename']))
                                vdisk.size = vdisk.info['volume_size']
                                vdisk.metadata = {'lba_size': vdisk.info['lba_size'],
                                                  'cluster_multiplier': vdisk.info['cluster_multiplier']}
                                # Create the disk in a locked context, but don't execute long running-task in same context
                                vdisk.save()
                                ensure_safety = True
                        finally:
                            mutex.release()
                        if ensure_safety:
                            MDSServiceController.ensure_safety(vdisk)
                            VDiskController.dtl_checkup(vdisk_guid=vdisk.guid)

                        # Update the disk with information from the hypervisor
                        if vdisk.vmachine is None:
                            MessageController.fire(MessageController.Type.EVENT,
                                                   {'type': 'vdisk_attached',
                                                    'metadata': {'vmachine_name': vmachine.name,
                                                                 'vdisk_name': disk['name']}})
                        vdisk.vmachine = vmachine
                        vdisk.name = disk['name']
                        vdisk.order = disk['order']
                        vdisk.save()
                        vdisk_guids.append(vdisk.guid)
                        vdisks_synced += 1

            for vdisk in vmachine.vdisks:
                if vdisk.guid not in vdisk_guids:
                    MessageController.fire(MessageController.Type.EVENT,
                                           {'type': 'vdisk_detached',
                                            'metadata': {'vmachine_name': vmachine.name,
                                                         'vdisk_name': vdisk.name}})
                    vdisk.vmachine = None
                    vdisk.save()

            VMachineController._logger.info('Updating vMachine finished (name {0}, {1} vdisks (re)linked)'.format(
                vmachine.name, vdisks_synced
            ))
        except Exception as ex:
            VMachineController._logger.info('Error during vMachine update: {0}'.format(str(ex)))
            raise