def test_incrementing_sleep(self): r = Retrying(wait='incrementing_sleep', wait_incrementing_start=500, wait_incrementing_increment=100) self.assertEqual(500, r.wait(1, 6546)) self.assertEqual(600, r.wait(2, 6546)) self.assertEqual(700, r.wait(3, 6546))
def generate_job_postings_from_s3( s3_conn, s3_prefix: Text, ) -> JobPostingGeneratorType: """ Stream all job listings from s3 Args: s3_conn: a boto s3 connection s3_prefix: path to the job listings. Yields: string in json format representing the next job listing Refer to sample_job_listing.json for example structure """ retrier = Retrying(retry_on_exception=retry_if_io_error, wait_exponential_multiplier=100, wait_exponential_max=100000) bucket_name, prefix = split_s3_path(s3_prefix) bucket = s3_conn.get_bucket(bucket_name) keys = bucket.list(prefix=prefix) for key in keys: logging.info('Extracting job postings from key {}'.format(key.name)) with BytesIO() as outfile: retrier.call(key.get_contents_to_file, outfile, cb=log_download_progress) outfile.seek(0) for line in outfile: yield json.loads(line.decode('utf-8'))
def test_exponential(self): r = Retrying(wait_exponential_max=100000) self.assertEqual(r.wait(1, 0), 2) self.assertEqual(r.wait(2, 0), 4) self.assertEqual(r.wait(3, 0), 8) self.assertEqual(r.wait(4, 0), 16) self.assertEqual(r.wait(5, 0), 32) self.assertEqual(r.wait(6, 0), 64)
def test_random_sleep_without_min(self): r = Retrying(wait_random_max=2000) times = set() times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) self.assertTrue(len(times) > 1) # this is kind of non-deterministic... for t in times: self.assertTrue(t >= 0) self.assertTrue(t <= 2000)
def test_random_sleep_without_min(self): r = Retrying(wait='random_sleep', wait_random_max=2000) times = set() times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) self.assertTrue(len(times) > 1) # this is kind of non-deterministic... for t in times: self.assertTrue(t >= 0) self.assertTrue(t <= 2000)
def test_random_sleep(self): r = Retrying(wait_random_min=1000, wait_random_max=2000) times = set() times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) times.add(r.wait(1, 6546)) # this is kind of non-deterministic... self.assertTrue(len(times) > 1) for t in times: self.assertTrue(t >= 1000) self.assertTrue(t <= 2000)
def disable_node(self, node): node = hostname2node(node) print("The {} node will be disabled".format(node)) args = {'argString': '-nodename {} -state disable'.format(node)} switch_power_off = requests.post("{}/api/18/job/{}/run".format( env.rundeck_url, env.rundeck_job, node), headers=self.http_header, data=json.dumps(args), verify=False) response = switch_power_off.json() request = '{}/api/18/execution/{}/state?{}'.format( env.rundeck_url, response['id'], env.rundeck_job) try: Retrying(stop_max_delay=60000, wait_fixed=500, retry_on_result=lambda resp: resp is None or resp.json().get('executionState') != 'SUCCEEDED')\ .call(check_node, request, self.http_header) except Exception as e: abort("The {} node cannot be disabled:\n{}".format(node, e)) print("The {} node is disabled".format(node))
def wrapped_f(*args, **kw): if _retry_init: rargs, rkw = _retry_init(dargs, dkw) else: rargs, rkw = dargs, dkw return Retrying(*rargs, **rkw).call(_warn_about_exceptions(f), *args, **kw)
def pop_jormungandr(cls): """ launch the front end """ logging.getLogger(__name__).debug("running jormungandr") # jormungandr is launched with apache utils.launch_exec("sudo service apache2 status") ret, _ = utils.launch_exec("sudo service apache2 start") utils.launch_exec("sudo service apache2 status") assert ret == 0, "cannot start apache" # to have better errors, we check at the beginning that all is right for data_set in cls.data_sets: # we wait a bit for the kraken to be started try: Retrying( stop_max_delay=data_set.reload_timeout.total_seconds() * 1000, wait_fixed=data_set.fixed_wait.total_seconds() * 1000, retry_on_result=lambda x: x != "running", ).call(kraken_status, data_set) except RetryError as e: assert False, "region {r} KO, status={s}".format( r=data_set.name, s=e.last_attempt.value)
def stop_tyr_worker(): if not start_or_stop_with_delay( 'tyr_worker', delay=8000, wait=500, start=False, exc_raise=False): print(red("there are still tyr_worker alive, something is wrong")) if env.kill_ghost_tyr_worker: print(red('killing all workers')) def get_workers(): with warn_only(): return run('ps -eo pid,command | grep [t]yr_worker') pids_to_kill = [ s.split(' ', 1)[0] for s in get_workers().split('\n') ] sudo('kill -9 {pid}'.format(pid=" ".join(pids_to_kill))) try: Retrying(stop_max_delay=4000, wait_fixed=1000, retry_on_result=get_workers).call(lambda: None) except RetryError: print red('Some workers are still alive: {}'.format( get_workers())) print red("Aborting") exit(1)
def start_or_stop_with_delay(service, delay, wait, start=True, only_once=False, exc_raise=False): # TODO refactor to overcome the SSH problem with respect to "service start" # see: https://github.com/fabric/fabric/issues/395 cmd = require.service.started if start else require.service.stopped retry_cond = (lambda x: not require.service.is_running(service)) if start \ else (lambda x: require.service.is_running(service)) if only_once: cmd(service) cmd = lambda x: None try: Retrying(stop_max_delay=delay, wait_fixed=wait, retry_on_result=retry_cond).call(cmd, service) except RetryError as e: message = "Service {} {} failed: ".format( service, 'start' if start else 'stop') + repr(e) if exc_raise: raise RuntimeError(message) print(red(message)) return False return True
def api_call(self, path, headers=None, retries=10, **kwargs): timeout = kwargs.pop("timeout", 10.0) opener = urllib2.build_opener() opener.addheaders = [("User-Agent", self.user_agent), ("region-id", str(self.region))] req = urllib2.Request(self.API_URL.format(path), **kwargs) # Make the request, with retries retrier = Retrying(stop_max_attempt_number=retries, wait_exponential_multiplier=500, wait_exponential_max=5000, retry_on_exception=retry_if_http_error, wrap_exception=True) res = retrier.call(opener.open, req, timeout=timeout) return json.loads(res.read())
def retry_wrapper(*args, **kwargs): channel = args[0] if not channel._active: raise PermanentError("Channel has been destroyed") return Retrying( wait_exponential_multiplier=_RETRY_WAIT_EXPONENTIAL_MULTIPLIER, wait_exponential_max=_RETRY_WAIT_EXPONENTIAL_MAX, retry_on_exception=channel._retry_if_not_consumer_error). \ call(f, *args, **kwargs)
def wait_for_rabbitmq_cnx(self): """ poll until the kraken is connected to rabbitmq small timeout because it must not be long (otherwise it way be a server configuration problem) """ Retrying(stop_max_delay=1 * 1000, wait_fixed=50, retry_on_result=lambda status: get_not_null(status['status'], 'is_connected_to_rabbitmq') is False) \ .call(lambda: self.query_region('status'))
def poll_until_reload(self, previous_val): """ poll until the kraken have reloaded its data check that the last_rt_data_loaded field is different from the first call """ Retrying(stop_max_delay=10 * 1000, wait_fixed=100, retry_on_result=lambda status: get_not_null(status['status'], 'last_rt_data_loaded') == previous_val) \ .call(lambda: self.query_region('status'))
def test_direct_call(self): value = 1 def working(): return value r = Retrying() self.assertFalse(r.start_time) self.assertFalse(r.attempts) result = r(working) self.assertEqual(value, result) self.assertEqual(1, r.attempts)
def test_exponential_with_max_wait(self): r = Retrying(wait_exponential_max=40) self.assertEqual(r.wait(1, 0), 2) self.assertEqual(r.wait(2, 0), 4) self.assertEqual(r.wait(3, 0), 8) self.assertEqual(r.wait(4, 0), 16) self.assertEqual(r.wait(5, 0), 32) self.assertEqual(r.wait(6, 0), 40) self.assertEqual(r.wait(7, 0), 40) self.assertEqual(r.wait(50, 0), 40)
def test_exponential_with_max_wait_and_multiplier(self): r = Retrying(wait='exponential_sleep', wait_exponential_max=50000, wait_exponential_multiplier=1000) self.assertEqual(r.wait(1, 0), 2000) self.assertEqual(r.wait(2, 0), 4000) self.assertEqual(r.wait(3, 0), 8000) self.assertEqual(r.wait(4, 0), 16000) self.assertEqual(r.wait(5, 0), 32000) self.assertEqual(r.wait(6, 0), 50000) self.assertEqual(r.wait(7, 0), 50000) self.assertEqual(r.wait(50, 0), 50000)
def test_deployment(): """ Verify api kirin is OK """ headers = {'Host': env.kirin_host} request = 'http://{}/status'.format(env.host_string) try: Retrying(stop_max_delay=30000, wait_fixed=100, retry_on_result=lambda resp: resp is None or resp.status_code != 200)\ .call(check_node, request, headers) except Exception as e: abort(e) print("{} is OK".format(request))
def job_postings(s3_conn, quarter, s3_path, source="all"): """ Stream all job listings from s3 for a given quarter Args: s3_conn: a boto s3 connection quarter: a string representing a quarter (2015Q1) s3_path: path to the job listings. source: should be a string or a subset of "nlx", "va", "cb" or "all" Yields: string in json format representing the next job listing Refer to sample_job_listing.json for example structure """ retrier = Retrying( retry_on_exception=retry_if_io_error, wait_exponential_multiplier=100, wait_exponential_max=100000 ) bucket_name, prefix = split_s3_path(s3_path) bucket = s3_conn.get_bucket(bucket_name) # keys = bucket.list(prefix='{}/{}'.format(prefix, quarter)) if isinstance(source, str): if source.lower() == "all": keys = bucket.list(prefix='{}/{}'.format(prefix, quarter)) else: keys = bucket.list(prefix='{}/{}/{}_'.format(prefix, quarter, source.upper())) elif isinstance(source, list): keys = [] for s in source: keys.append(bucket.list(prefix='{}/{}/{}_'.format(prefix, quarter, s.upper()))) keys = chain(*keys) for key in keys: logging.info('Extracting job postings from key {}'.format(key.name)) with BytesIO() as outfile: retrier.call(key.get_contents_to_file, outfile, cb=log_download_progress) outfile.seek(0) for line in outfile: yield line.decode('utf-8')
def retrying_factory(**kwargs) -> Retrying: def _dont_retry_error_filter(e): """Return True if we should retry""" return not isinstance(e, (AuthError, BadInputError, FileNotFoundError)) if "wait_exponential_multiplier" not in kwargs: kwargs["wait_exponential_multiplier"] = 250 if "wait_exponential_max" not in kwargs: kwargs["wait_exponential_max"] = 10000 if "stop_max_attempt_number" not in kwargs: kwargs["stop_max_attempt_number"] = 6 if "retry_on_exception" not in kwargs: kwargs["retry_on_exception"] = _dont_retry_error_filter return Retrying(**kwargs)
def test_exponential_with_max_wait_and_multiplier(self): r = Retrying(wait_exponential_max=50000, wait_exponential_multiplier=1000) self.assertEqual(r.wait(1, 0), 2000) self.assertEqual(r.wait(2, 0), 4000) self.assertEqual(r.wait(3, 0), 8000) self.assertEqual(r.wait(4, 0), 16000) self.assertEqual(r.wait(5, 0), 32000) self.assertEqual(r.wait(6, 0), 50000) self.assertEqual(r.wait(7, 0), 50000) self.assertEqual(r.wait(50, 0), 50000)
def test_wait_aggregation_func(self): r = Retrying(wait_exponential_max=50000, wait_exponential_multiplier=1000, wait_fixed=1, wait_aggregation_func=sum) self.assertEqual(r.wait(1, 0), 2001) self.assertEqual(r.wait(2, 0), 4001) self.assertEqual(r.wait(3, 0), 8001) self.assertEqual(r.wait(4, 0), 16001) self.assertEqual(r.wait(5, 0), 32001) self.assertEqual(r.wait(6, 0), 50001) self.assertEqual(r.wait(7, 0), 50001) self.assertEqual(r.wait(50, 0), 50001)
def start_or_stop_with_delay(service, delay, wait, start=True, only_once=False, exc_raise=False): cmd = require.service.started if start else require.service.stopped retry_cond = (lambda x: not require.service.is_running(service)) if start \ else (lambda x: require.service.is_running(service)) if only_once: cmd(service) cmd = lambda x: None try: Retrying(stop_max_delay=delay, wait_fixed=wait, retry_on_result=retry_cond).call(cmd, service) except RetryError as e: message = "Service {} {} failed: ".format(service, 'start' if start else 'stop') + repr(e) if exc_raise: raise RuntimeError(message) print(red(message)) return False return True
def print_status(): def check_and_print_response(query, header=None): response = check_node(query, header) if response is None or response.status_code != 200: return False else: print("") print("curl {}".format(query)) print(response.json()) print("") return True request = 'http://{}/status'.format(env.kirin_host) try: Retrying(stop_max_delay=30000, wait_fixed=100, retry_on_result=lambda res: not res)\ .call(check_and_print_response, request) except Exception as e: abort(e)
def _create_retrier(self, max_retries, max_retry_time): """ Create the Retrier object used to process idempotent client requests. If only max_retries is set, the default max_retry_time is ignored. Args: max_retries (int): the number of retries to be attempted max_retry_time (int): the number of time, in seconds, to retry for. Returns: A Retrying instance, that implements a call(func) method. """ # Client sets max_retries only if max_retries is not None and max_retry_time is None: stop_max_delay = None stop_max_attempt_number = max_retries + 1 wait_exponential_multiplier = self.RETRY_DEFAULT_EXPONENTIAL_BACKOFF_MS else: stop_max_delay = (max_retry_time or self.RETRY_DEFAUT_MAX_RETRY_TIME_S) * 1000.0 stop_max_attempt_number = (max_retries or self.RETRY_DEFAULT_MAX_RETRIES) + 1 # Compute the backoff to allow for max_retries queries during the allowed delay # Solves the following formula (assumes requests are immediate): # max_retry_time = sum(exp_multiplier * 2 ** i) for i from 1 to max_retries + 1 wait_exponential_multiplier = stop_max_delay / ( (2**(stop_max_attempt_number + 1)) - 2) return Retrying( stop_max_attempt_number=stop_max_attempt_number, stop_max_delay=stop_max_delay, retry_on_exception=_hc_retry_on_exception, wait_exponential_multiplier=wait_exponential_multiplier, wait_jitter_max=self.RETRY_DEFAULT_JITTER_MS)
def test_no_sleep(self): r = Retrying() self.assertEqual(0, r.wait(18, 9879))
def test_stop_func(self): r = Retrying(stop_func=lambda attempt, delay: attempt == delay) self.assertFalse(r.stop(1, 3)) self.assertFalse(r.stop(100, 99)) self.assertTrue(r.stop(101, 101))
def test_stop_after_delay(self): r = Retrying(stop_max_delay=1000) self.assertFalse(r.stop(2, 999)) self.assertTrue(r.stop(2, 1000)) self.assertTrue(r.stop(2, 1001))
def test_stop_after_attempt(self): r = Retrying(stop_max_attempt_number=3) self.assertFalse(r.stop(2, 6546)) self.assertTrue(r.stop(3, 6546)) self.assertTrue(r.stop(4, 6546))
def test_never_stop(self): r = Retrying() self.assertFalse(r.stop(3, 6546))
def test_wait_func(self): r = Retrying(wait_func=lambda attempt, delay: attempt * delay) self.assertEqual(r.wait(1, 5), 5) self.assertEqual(r.wait(2, 11), 22) self.assertEqual(r.wait(10, 100), 1000)
def test_fixed_sleep(self): r = Retrying(wait_fixed=1000) self.assertEqual(1000, r.wait(12, 6546))
def test_incrementing_sleep(self): r = Retrying(wait_incrementing_start=500, wait_incrementing_increment=100) self.assertEqual(500, r.wait(1, 6546)) self.assertEqual(600, r.wait(2, 6546)) self.assertEqual(700, r.wait(3, 6546))
def test_legacy_explicit_stop_type(self): Retrying(stop="stop_after_attempt")
def wrapped_f(*args, **kw): return Retrying(*dargs, **dkw).call(f, *args, **kw)
def _get(self, url, package): retry = Retrying(wait_exponential_multiplier=2000, wait_exponential_max=120000, retry_on_exception=_retry_msg) return retry.call(requests.get, url % package)