def get_resource_manager_lock(name): """ Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it will wait until the currently active instance becomes unavailable, at which point the worker cleanup routine will clear the lock for us to acquire. A worker record will be created so that the waiting resource manager will appear in the Status API. This worker record will be cleaned up through the regular worker shutdown routine. :param name: The hostname of the worker :type name: basestring """ assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME) lock = ResourceManagerLock(name=name) # Whether this is the first lock availability check for this instance _first_check = True while True: now = dateutils.ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta( seconds=constants.PULP_PROCESS_TIMEOUT_INTERVAL) ResourceManagerLock.objects(timestamp__lte=old_timestamp).delete() # Create / update the worker record so that Pulp knows we exist Worker.objects(name=name).update_one( set__last_heartbeat=datetime.utcnow(), upsert=True) try: lock.timestamp = now lock.save() msg = _( "Resource manager '%s' has acquired the resource manager lock" ) % name _logger.debug(msg) if not _first_check: msg = _( "Failover occurred: '%s' is now the primary resource manager" ) % name _logger.warning(msg) break except mongoengine.NotUniqueError: # Only log the message the first time if _first_check: _logger.info( _("Hot spare pulp_resource_manager instance '%(name)s' detected." ) % {'name': name}) _first_check = False time.sleep(constants.PULP_PROCESS_HEARTBEAT_INTERVAL)
def get_resource_manager_lock(name): """ Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it will wait until the currently active instance becomes unavailable, at which point the worker cleanup routine will clear the lock for us to acquire. A worker record will be created so that the waiting resource manager will appear in the Status API. This worker record will be cleaned up through the regular worker shutdown routine. :param name: The hostname of the worker :type name: basestring """ assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME) lock = ResourceManagerLock(name=name) # Whether this is the first lock availability check for this instance _first_check = True while True: now = dateutils.ensure_tz(datetime.utcnow()) old_timestamp = now - timedelta(seconds=PULP_PROCESS_TIMEOUT_INTERVAL) ResourceManagerLock.objects(timestamp__lte=old_timestamp).delete() # Create / update the worker record so that Pulp knows we exist Worker.objects(name=name).update_one(set__last_heartbeat=datetime.utcnow(), upsert=True) try: lock.timestamp = now lock.save() msg = _("Resource manager '%s' has acquired the resource manager lock") % name _logger.debug(msg) if not _first_check: msg = _("Failover occurred: '%s' is now the primary resource manager") % name _logger.warning(msg) break except mongoengine.NotUniqueError: # Only log the message the first time if _first_check: _logger.info(_("Hot spare pulp_resource_manager instance '%(name)s' detected.") % {'name': name}) _first_check = False time.sleep(PULP_PROCESS_HEARTBEAT_INTERVAL)
def get_resource_manager_lock(name): """ Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it will wait until the currently active instance becomes unavailable, at which point the worker cleanup routine will clear the lock for us to acquire. A worker record will be created so that the waiting resource manager will appear in the Status API. We override the SIGTERM signal handler so that that the worker record will be immediately cleaned up if the process is killed while in this states. :param name: The hostname of the worker :type name: basestring """ assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME) lock = ResourceManagerLock(name=name) with custom_sigterm_handler(name): # Whether this is the first lock availability check for this instance _first_check = True while True: # Create / update the worker record so that Pulp knows we exist Worker.objects(name=name).update_one( set__last_heartbeat=datetime.utcnow(), upsert=True) try: lock.save() msg = _( "Resource manager '%s' has acquired the resource manager lock" % name) _logger.info(msg) break except mongoengine.NotUniqueError: # Only log the message the first time if _first_check: msg = _( "Resource manager '%s' attempted to acquire the the resource manager " "lock but was unable to do so. It will retry every %d seconds until " "the lock can be acquired." % (name, constants.CELERY_CHECK_INTERVAL)) _logger.info(msg) _first_check = False time.sleep(constants.CELERY_CHECK_INTERVAL)
def get_resource_manager_lock(name): """ Tries to acquire the resource manager lock. If the lock cannot be acquired immediately, it will wait until the currently active instance becomes unavailable, at which point the worker cleanup routine will clear the lock for us to acquire. A worker record will be created so that the waiting resource manager will appear in the Status API. This worker record will be cleaned up through the regular worker shutdown routine. :param name: The hostname of the worker :type name: basestring """ assert name.startswith(constants.RESOURCE_MANAGER_WORKER_NAME) lock = ResourceManagerLock(name=name) # Whether this is the first lock availability check for this instance _first_check = True while True: # Create / update the worker record so that Pulp knows we exist Worker.objects(name=name).update_one(set__last_heartbeat=datetime.utcnow(), upsert=True) try: lock.save() msg = _("Resource manager '%s' has acquired the resource manager lock") % name _logger.info(msg) break except mongoengine.NotUniqueError: # Only log the message the first time if _first_check: msg = _("Resource manager '%(name)s' attempted to acquire the the resource manager " "lock but was unable to do so. It will retry every %(interval)d seconds " "until the lock can be acquired.") % \ {'name': name, 'interval': constants.CELERY_CHECK_INTERVAL} _logger.info(msg) _first_check = False time.sleep(constants.CELERY_CHECK_INTERVAL)