def _instance_wait_safe(self, instance_method, *args, **kwargs): """ Wrapper around GCE instance methods that is safer to use. Let's try a method, and if it fails, let's retry using an exponential backoff algorithm, similar to what Amazon recommends for it's own service [1]. :see: [1] http://docs.aws.amazon.com/general/latest/gr/api-retries.html """ threshold = 300 ok = False retries = 0 max_retries = 9 while not ok and retries <= max_retries: try: return instance_method(*args, **kwargs) except Exception as details: # pylint: disable=broad-except self.log.error('Call to method %s (retries: %s) failed: %s', instance_method, retries, details) time.sleep(min((2**retries) * 2, threshold)) retries += 1 if not ok: raise cluster.NodeError('GCE instance %s method call error after ' 'exponential backoff wait' % self.ec2_host.id)
def _instance_wait_safe(self, instance_method: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R: try: return exponential_retry(func=lambda: instance_method(*args, **kwargs), logger=self.log) except tenacity.RetryError: raise cluster.NodeError( f"Timeout while running '{instance_method.__name__}' method on GCE instance '{self._instance.id}'" ) from None
def _instance_wait_safe(self, instance_method: Callable[P, R], *args: P.args, **kwargs: P.kwargs) -> R: try: return exponential_retry(func=lambda: instance_method(*args, **kwargs), logger=None) except tenacity.RetryError: try: self._instance.reload() except Exception as ex: # pylint: disable=broad-except LOGGER.exception("Error while reloading instance metadata: %s", ex) finally: LOGGER.debug(self._instance.meta.data) raise cluster.NodeError( f"Timeout while running '{instance_method.__name__}' method on AWS instance '{self._instance.id}'" ) from None
def _instance_wait_safe(self, instance_method, *args, **kwargs): """ Wrapper around AWS instance waiters that is safer to use. Since AWS adopts an eventual consistency model, sometimes the method wait_until_running will raise a botocore.exceptions.WaiterError saying the instance does not exist. AWS API guide [1] recommends that the procedure is retried using an exponencial backoff algorithm [2]. :see: [1] http://docs.aws.amazon.com/AWSEC2/latest/APIReference/query-api-troubleshooting.html#eventual-consistency :see: [2] http://docs.aws.amazon.com/general/latest/gr/api-retries.html """ threshold = 300 ok = False retries = 0 max_retries = 9 while not ok and retries <= max_retries: try: instance_method(*args, **kwargs) ok = True except WaiterError: time.sleep(min((2**retries) * 2, threshold)) retries += 1 if not ok: try: self._instance.reload() except Exception as ex: # pylint: disable=broad-except LOGGER.exception("Error while reloading instance metadata: %s", ex) finally: method_name = instance_method.__name__ instance_id = self._instance.id LOGGER.debug(self._instance.meta.data) msg = "Timeout while running '{method_name}' method on AWS instance '{instance_id}'".format( method_name=method_name, instance_id=instance_id) raise cluster.NodeError(msg)