def _wait_until_change_set_ready(self, change_set_name, status_key, desired_state, sleep=5): """Waits until a Change Set has hit the desired state. This loop waits until a Change Set has reached a desired state by comparing the value of the `status_key` with the `desired_state`. This allows the method to be used to check the status of the Change Set generation itself (status_key=Status) as well as the execution of the Change Set (status_key=ExecutionStatus). args: change_set_name: The Change Set Request Name status_key: The key within the Change Set that defines its status desired_state: A string of the desired state we're looking for sleep: Time to wait between checks in seconds returns: The final completed change set dictionary """ self.log.info('Waiting for %s to reach %s' % (change_set_name, desired_state)) while True: try: change = yield self.api_call(self.cf3_conn.describe_change_set, ChangeSetName=change_set_name) except ClientError as e: # If we hit an intermittent error, lets just loop around and # try again. self.log.error('Error receiving Change Set state: %s' % e) yield utils.tornado_sleep(sleep) continue # The Stack State can be 'AVAILABLE', or an IN_PROGRESS string. In # either case, we loop and wait. if change[status_key] in (('AVAILABLE', ) + IN_PROGRESS): self.log.info('Change Set state is %s, waiting %s(s)...' % (change[status_key], sleep)) yield utils.tornado_sleep(sleep) continue # If the stack is in the desired state, then return if change[status_key] == desired_state: self.log.debug('Change Set reached desired state: %s' % change[status_key]) raise gen.Return(change) # Lastly, if we get here, then something is very wrong and we got # some funky status back. Throw an exception. msg = 'Unexpected Change Set state (%s) received (%s): %s' % ( status_key, change[status_key], change.get('StatusReason', 'StatusReason not provided.')) raise StackFailed(msg)
def test_repeating_log(self): logger = mock.Mock() # used for tracking # Repeat this message 10 times per second logid = utils.create_repeating_log(logger.info, 'test', seconds=0.1) yield utils.tornado_sleep(0.45) # Some process takes .4 <> .5 seconds utils.clear_repeating_log(logid) self.assertEquals(logger.info.call_count, 4) # Let's make sure that we don't keep looping our log message. yield utils.tornado_sleep(0.2) self.assertEquals(logger.info.call_count, 4)
def _wait_until_change_set_ready(self, change_set_name, status_key, desired_state, sleep=5): """Waits until a Change Set has hit the desired state. This loop waits until a Change Set has reached a desired state by comparing the value of the `status_key` with the `desired_state`. This allows the method to be used to check the status of the Change Set generation itself (status_key=Status) as well as the execution of the Change Set (status_key=ExecutionStatus). args: change_set_name: The Change Set Request Name status_key: The key within the Change Set that defines its status desired_state: A string of the desired state we're looking for sleep: Time to wait between checks in seconds returns: The final completed change set dictionary """ self.log.info('Waiting for %s to reach %s' % (change_set_name, desired_state)) while True: try: change = yield self.api_call( self.cf3_conn.describe_change_set, ChangeSetName=change_set_name) except ClientError as e: # If we hit an intermittent error, lets just loop around and # try again. self.log.error('Error receiving change set state: %s' % e) yield utils.tornado_sleep(sleep) continue # The Stack State can be 'AVAILABLE', or an IN_PROGRESS string. In # either case, we loop and wait. if change[status_key] in (('AVAILABLE',) + IN_PROGRESS): self.log.info('Change Set state is %s, waiting %s(s)...' % (change[status_key], sleep)) yield utils.tornado_sleep(sleep) continue # If the stack is in the desired state, then return if change[status_key] == desired_state: self.log.debug('Change Set reached desired state: %s' % change[status_key]) raise gen.Return(change) # Lastly, if we get here, then something is very wrong and we got # some funky status back. Throw an exception. msg = 'Unxpected stack state received (%s)' % change[status_key] raise StackFailed(msg)
def _wait_until_empty(self, array, sleep=60): """Sleep until all array instances are terminated. This loop monitors the server array for its current live instance count and waits until the count hits zero before progressing. TODO: Add a timeout setting. Args: array: rightscale.Resource array object sleep: Integer time to sleep between checks (def: 60) """ if self._dry: self.log.info('Pretending that array %s instances ' 'are terminated.' % array.soul['name']) raise gen.Return() while True: instances = yield self._client.get_server_array_current_instances( array) count = len(instances) self.log.info('%s instances found' % count) if count < 1: raise gen.Return() # At this point, sleep self.log.debug('Sleeping..') yield utils.tornado_sleep(sleep)
def _wait(self, queue, sleep=3): """Sleeps until an SQS Queue has emptied out. Args: queue: AWS SQS Queue object sleep: Int of seconds to wait between checks Returns: True: When queue is empty. """ count = 0 while True: if not self._dry: self.log.debug('Counting %s' % queue.url) visible = yield self.thread(queue.count) attr = 'ApproximateNumberOfMessagesNotVisible' invisible = yield self.thread(queue.get_attributes, attr) invisible_int = int(invisible[attr]) count = visible + invisible_int else: self.log.info('Pretending that count is 0 for %s' % queue.url) count = 0 self.log.debug('Queue has %s messages in it.' % count) if count > 0: self.log.info('Waiting on %s to become empty...' % queue.name) yield utils.tornado_sleep(sleep) else: self.log.debug('Queue is empty!') break raise gen.Return(True)
def _execute(self): """Executes an actor and yields the results when its finished. raises: gen.Return(True) """ elb = yield self._find_elb(name=self.option('name')) repeating_log = utils.create_repeating_log( self.log.info, 'Still waiting for %s to become healthy' % self.option('name'), seconds=30) while True: healthy = yield self._is_healthy(elb, count=self.option('count')) if healthy is True: self.log.info('ELB is healthy.') break # In dry mode, fake it if self._dry: self.log.info('Pretending that ELB is healthy.') break # Not healthy :( continue looping self.log.debug('Retrying in 3 seconds.') yield utils.tornado_sleep(3) utils.clear_repeating_log(repeating_log) raise gen.Return()
def _wait(self, queue, sleep=3): """Sleeps until an SQS Queue has emptied out. Args: queue: AWS SQS Queue object sleep: Int of seconds to wait between checks Returns: True: When queue is empty. """ count = 0 while True: if not self._dry: self.log.debug('Counting %s' % queue.url) visible = yield self.api_call(queue.count) attr = 'ApproximateNumberOfMessagesNotVisible' invisible = yield self.api_call(queue.get_attributes, attr) invisible_int = int(invisible[attr]) count = visible + invisible_int else: self.log.info('Pretending that count is 0 for %s' % queue.url) count = 0 self.log.debug('Queue has %s messages in it.' % count) if count > 0: self.log.info('Waiting on %s to become empty...' % queue.name) yield utils.tornado_sleep(sleep) else: self.log.debug('Queue is empty!') break raise gen.Return(True)
def integration_02a_test_unpause(self): actor = pingdom.Unpause('Unpause check', {'name': self.check_name}) yield actor.execute() yield utils.tornado_sleep(30) # Let Pingdom cache settle check = yield actor._get_check() self.assertEqual(check['status'], 'up')
def integration_02a_test_unpause(self): actor = pingdom.Unpause('Unpause check', {'name': self.check_name}) yield actor.execute() yield utils.tornado_sleep(30) # Let Pingdom cache settle check = yield actor._get_check() self.assertEquals(check['status'], 'up')
def _execute(self): """Executes an actor and yields the results when its finished.""" self.log.debug('Sleeping for %s seconds' % self.option('sleep')) sleep = self.option('sleep') if isinstance(sleep, basestring): sleep = float(sleep) if not self._dry: yield utils.tornado_sleep(seconds=sleep)
def integration_02b_monitor_queue(self): actor = sqs.WaitUntilEmpty('Wait until empty', {'name': self.queue_name, 'region': self.region}) log.debug('New queue should be empty') queue = actor.sqs_conn.get_queue(self.queue_name) self.assertEquals(queue.count(), 0) done = yield actor.execute() yield utils.tornado_sleep() self.assertEquals(done, None)
def integration_02_wait_for_elb_health(self): actor = elb.WaitUntilHealthy( 'Test', {'name': self.elb_name, 'count': 1, 'region': self.region}) # NOTE: We are not "yielding" the execution here, but the task # goes on top of the IOLoop. The sleep statement below allows # the wait_task's actions to get executed by tornado's loop. wait_task = actor.execute() yield utils.tornado_sleep(1) # Expected count is 1, so this should not be done yet... self.assertFalse(wait_task.done())
def _wait_until_state(self, desired_states, sleep=15): """Indefinite loop until a stack has finished creating/deleting. Whether the stack has failed, suceeded or been rolled back... this method loops until the process has finished. If the final status is a failure (rollback/failed) then an exception is raised. Args: desired_states: (tuple/list) States that indicate a successful operation. sleep: (int) Time in seconds between stack status checks Raises: StackNotFound: If the stack doesn't exist. """ while True: stack = yield self._get_stack(self.option('name')) if not stack: msg = 'Stack "%s" not found.' % self.option('name') raise StackNotFound(msg) self.log.debug('Got stack %s status: %s' % (stack.stack_name, stack.stack_status)) # First, lets see if the stack is still in progress (either # creation, deletion, or rollback .. doesn't really matter) if stack.stack_status in IN_PROGRESS: self.log.info('Stack is in %s, waiting %s(s)...' % (stack.stack_status, sleep)) yield utils.tornado_sleep(sleep) continue # If the stack is in the desired state, then return if stack.stack_status in desired_states: self.log.info('Stack execution completed, final state: %s' % stack.stack_status) raise gen.Return() # Lastly, if we get here, then something is very wrong and we got # some funky status back. Throw an exception. msg = 'Unxpected stack state received (%s)' % stack.stack_status raise CloudFormationError(msg)
def test_execute_retry(self): actor = elb_actor.WaitUntilHealthy( 'Unit Test Action', {'name': 'unit-test-queue', 'region': 'us-west-2', 'count': 3}) actor._find_elb = mock.Mock(return_value=helper.tornado_value('ELB')) actor._is_healthy = mock.Mock( side_effect=[helper.tornado_value(False), helper.tornado_value(True)]) # Optional mock -- making the test quicker. short_sleep = utils.tornado_sleep(0) with mock.patch('kingpin.utils.tornado_sleep') as ts: ts.return_value = short_sleep val = yield actor._execute() self.assertEquals(actor._find_elb.call_count, 1) # Don't refetch! self.assertEquals(actor._is_healthy.call_count, 2) # Retry! self.assertEquals(val, None)
def _wait_on_draining(self, elb): """Waits for the ELB Connection Draining to occur. ELB Connection Draining is a configured-setting on the ELB that will continue to allow existing connections to be handeled before finally cutting them off at the timeout. This method will detect if connection draining is enabled, and optionally "sleep" for that time period before returning from the actor. Args: elb: boto Loadbalancer object """ if not self.str2bool(self.option('wait_on_draining')): self.log.warning('Not waiting for connections to drain!') attrs = yield self.thread(elb.get_attributes) if attrs.connection_draining.enabled: timeout = attrs.connection_draining.timeout self.log.info('Connection Draining Enabled, waiting %s(s)' % timeout) yield utils.tornado_sleep(timeout)
def _wait_until_healthy(self, array, sleep=60): """Sleep until a server array has its min_count servers running. This loop monitors the server array for its current live instance count and waits until the count hits zero before progressing. TODO: Add a timeout setting. Args: array: rightscale.Resource array object sleep: Integer time to sleep between checks (def: 60) """ if self._dry: self.log.info('Pretending that array %s instances are launched.' % array.soul['name']) raise gen.Return() # Get the current min_count setting from the ServerArray object, or get # the min_count from the count number supplied to the actor (if it # was). min_count = int(self._options.get('count', False)) if not min_count: min_count = int(array.soul['elasticity_params'] ['bounds']['min_count']) while True: instances = yield self._client.get_server_array_current_instances( array, filters=['state==operational']) count = len(instances) self.log.info('%s instances found, waiting for %s' % (count, min_count)) if min_count <= count: raise gen.Return() # At this point, sleep self.log.debug('Sleeping..') yield utils.tornado_sleep(sleep)
def test_execute_retry(self): actor = elb_actor.WaitUntilHealthy('Unit Test Action', { 'name': 'unit-test-queue', 'region': 'us-west-2', 'count': 3 }) actor._find_elb = mock.Mock(return_value=helper.tornado_value('ELB')) actor._is_healthy = mock.Mock(side_effect=[ helper.tornado_value(False), helper.tornado_value(True) ]) # Optional mock -- making the test quicker. short_sleep = utils.tornado_sleep(0) with mock.patch('kingpin.utils.tornado_sleep') as ts: ts.return_value = short_sleep val = yield actor._execute() self.assertEquals(actor._find_elb.call_count, 1) # Don't refetch! self.assertEquals(actor._is_healthy.call_count, 2) # Retry! self.assertEquals(val, None)
def false(self): yield utils.tornado_sleep(0.01) raise gen.Return(False)
def wrapper(self, *args, **kwargs): i = 1 # Get a list of private kwargs to mask private_kwargs = getattr(self, '_private_kwargs', []) # For security purposes, create a patched kwargs string that # removes passwords from the arguments. This is never guaranteed to # work (an API could have 'foo' as their password field, and we # just won't know ...), but we make a best effort here. safe_kwargs = dict(kwargs) remove = [k for k in safe_kwargs if k in private_kwargs] for k in remove: safe_kwargs[k] = '****' while True: # Don't log out the first try as a 'Try' ... just do it if i > 1: log.debug('Try (%s/%s) of %s(%s, %s)' % (i, retries, f, args, safe_kwargs)) # Attempt the method. Catch any exception listed in # self._EXCEPTIONS. try: ret = yield gen.coroutine(f)(self, *args, **kwargs) raise gen.Return(ret) except tuple(self._EXCEPTIONS.keys()) as e: error = str(e) if hasattr(e, 'message'): error = e.message log.warning('Exception raised on try %s: %s' % (i, error)) # If we've run out of retry attempts, raise the exception if i >= retries: log.debug('Raising exception: %s' % e) raise e # Gather the config for this exception-type from # self._EXCEPTIONS. Iterate through the data and see if we # have a matching exception string. exc_conf = self._EXCEPTIONS[type(e)].copy() # An empty string for the key is the default exception # It's optional, but can match before others match, so we # pop it before searching. default_exc = exc_conf.pop('', False) log.debug('Searching through %s' % exc_conf) matched_exc = [ exc for key, exc in exc_conf.items() if key in str(e) ] log.debug('Matched exceptions: %s' % matched_exc) if matched_exc and matched_exc[0] is not None: exception = matched_exc[0] log.debug('Matched exception: %s' % exception) raise exception(error) elif matched_exc and matched_exc[0] is None: log.debug('Exception is retryable!') pass elif default_exc is not False: raise default_exc(str(e)) elif default_exc is False: # Reaching this part means no exception was matched # and no default was specified. log.debug('No explicit behavior for this exception' ' found. Raising.') raise e # Must have been a retryable exception. Retry. i = i + 1 log.debug('Retrying in %s...' % delay) yield utils.tornado_sleep(delay) log.debug('Retrying..')
def wrapper(self, *args, **kwargs): i = 1 # Get a list of private kwargs to mask private_kwargs = getattr(self, '_private_kwargs', []) # For security purposes, create a patched kwargs string that # removes passwords from the arguments. This is never guaranteed to # work (an API could have 'foo' as their password field, and we # just won't know ...), but we make a best effort here. safe_kwargs = dict(kwargs) remove = [k for k in safe_kwargs if k in private_kwargs] for k in remove: safe_kwargs[k] = '****' while True: # Don't log out the first try as a 'Try' ... just do it if i > 1: log.debug('Try (%s/%s) of %s(%s, %s)' % (i, retries, f, args, safe_kwargs)) # Attempt the method. Catch any exception listed in # self._EXCEPTIONS. try: ret = yield gen.coroutine(f)(self, *args, **kwargs) raise gen.Return(ret) except tuple(self._EXCEPTIONS.keys()) as e: error = str(e) if hasattr(e, 'message'): error = e.message log.warning('Exception raised on try %s: %s' % (i, error)) # If we've run out of retry attempts, raise the exception if i >= retries: log.debug('Raising exception: %s' % e) raise e # Gather the config for this exception-type from # self._EXCEPTIONS. Iterate through the data and see if we # have a matching exception string. exc_conf = self._EXCEPTIONS[type(e)].copy() # An empty string for the key is the default exception # It's optional, but can match before others match, so we # pop it before searching. default_exc = exc_conf.pop('', False) log.debug('Searching through %s' % exc_conf) matched_exc = [exc for key, exc in exc_conf.items() if key in str(e)] log.debug('Matched exceptions: %s' % matched_exc) if matched_exc and matched_exc[0] is not None: exception = matched_exc[0] log.debug('Matched exception: %s' % exception) raise exception(error) elif matched_exc and matched_exc[0] is None: log.debug('Exception is retryable!') pass elif default_exc is not False: raise default_exc(str(e)) elif default_exc is False: # Reaching this part means no exception was matched # and no default was specified. log.debug('No explicit behavior for this exception' ' found. Raising.') raise e # Must have been a retryable exception. Retry. i = i + 1 log.debug('Retrying in %s...' % delay) yield utils.tornado_sleep(delay) log.debug('Retrying..')
def wait_for_task(self, task, task_name=None, sleep=5, loc_log=log, instance=None): """Monitors a RightScale task for completion. RightScale tasks are provided as URLs that we can query for the run-status of the task. This method repeatedly queries a task for completion (every 5 seconds), and returns when the task has finished. TODO: Add a task-timeout option. Note: This is a completely retryable operation in the event that an intermittent network connection causes any kind of a connection failure. Args: task: RightScale Task resource object. task_name: Human-readable name of the task to be executed. sleep: Integer of seconds to wait before the first status check. loc_log: logging.getLogger() object to be used to log task status. This is useful when this API call is called from a Kingpin actor, and you want to use the actor's specific logger. If nothing is passed - local `log` object is used. instance: RightScale instance object on which the task is executed. Returns: bool: success status """ if not task: # If there is no task to wait on - don't wait! raise gen.Return(True) timeout_id = None if task_name: timeout_id = utils.create_repeating_log(loc_log.info, 'Still waiting on %s' % task_name, seconds=sleep) # Tracking when the tasks start so we can search by date later # RightScale expects the time to be a string in UTC now = datetime.utcnow() tasks_start = now.strftime('%Y/%m/%d %H:%M:%S +0000') while True: # Get the task status output = yield self._get_task_info(task) summary = output.soul['summary'].lower() stamp = datetime.now() if 'success' in summary or 'completed' in summary: status = True break if 'failed' in summary: status = False break loc_log.debug('Task (%s) status: %s (updated at: %s)' % (output.path, output.soul['summary'], stamp)) yield utils.tornado_sleep(min(sleep, 5)) loc_log.debug('Task (%s) status: %s (updated at: %s)' % (output.path, output.soul['summary'], stamp)) if timeout_id: utils.clear_repeating_log(timeout_id) if status is True: raise gen.Return(True) if not instance: raise gen.Return(status) # If something failed we want to find out why -- get audit logs # Contact RightScale for audit logs of this instance. now = datetime.utcnow() tasks_finish = now.strftime('%Y/%m/%d %H:%M:%S +0000') loc_log.error('Task failed. Instance: "%s".' % instance.soul['name']) audit_logs = yield self.get_audit_logs(instance=instance, start=tasks_start, end=tasks_finish, match='failed') # Print every audit log that was obtained (may be 0) [loc_log.error(l) for l in audit_logs] if not audit_logs: loc_log.error('No audit logs for %s' % instance) loc_log.debug('Task finished, return value: %s, summary: %s' % (status, summary)) raise gen.Return(status)
def testTornadoSleep(self): start = time.time() yield utils.tornado_sleep(0.1) stop = time.time() self.assertTrue(stop - start > 0.1)
def true(self): yield utils.tornado_sleep(0.01) raise gen.Return(True)
def wait_for_task(self, task, task_name=None, sleep=5, loc_log=log, instance=None): """Monitors a RightScale task for completion. RightScale tasks are provided as URLs that we can query for the run-status of the task. This method repeatedly queries a task for completion (every 5 seconds), and returns when the task has finished. TODO: Add a task-timeout option. Note: This is a completely retryable operation in the event that an intermittent network connection causes any kind of a connection failure. Args: task: RightScale Task resource object. task_name: Human-readable name of the task to be executed. sleep: Integer of seconds to wait before the first status check. loc_log: logging.getLogger() object to be used to log task status. This is useful when this API call is called from a Kingpin actor, and you want to use the actor's specific logger. If nothing is passed - local `log` object is used. instance: RightScale instance object on which the task is executed. Returns: bool: success status """ if not task: # If there is no task to wait on - don't wait! raise gen.Return(True) timeout_id = None if task_name: timeout_id = utils.create_repeating_log( loc_log.info, 'Still waiting on %s' % task_name, seconds=sleep) # Tracking when the tasks start so we can search by date later # RightScale expects the time to be a string in UTC now = datetime.utcnow() tasks_start = now.strftime('%Y/%m/%d %H:%M:%S +0000') while True: # Get the task status output = yield self._get_task_info(task) summary = output.soul['summary'] stamp = datetime.now() if 'success' in summary or 'completed' in summary: status = True break if 'failed' in summary: status = False break loc_log.debug('Task (%s) status: %s (updated at: %s)' % (output.path, output.soul['summary'], stamp)) yield utils.tornado_sleep(min(sleep, 5)) loc_log.debug('Task (%s) status: %s (updated at: %s)' % (output.path, output.soul['summary'], stamp)) if timeout_id: utils.clear_repeating_log(timeout_id) if status is True: raise gen.Return(True) if not instance: raise gen.Return(status) # If something failed we want to find out why -- get audit logs # Contact RightScale for audit logs of this instance. now = datetime.utcnow() tasks_finish = now.strftime('%Y/%m/%d %H:%M:%S +0000') loc_log.error('Task failed. Instance: "%s".' % instance.soul['name']) audit_logs = yield self.get_audit_logs( instance=instance, start=tasks_start, end=tasks_finish, match='failed') # Print every audit log that was obtained (may be 0) [loc_log.error(l) for l in audit_logs] if not audit_logs: loc_log.error('No audit logs for %s' % instance) loc_log.debug('Task finished, return value: %s, summary: %s' % (status, summary)) raise gen.Return(status)