def __init__(self, event, context): """ Initializes handler. :param event: Event to handle :param context: Context if run within Lambda environment """ self._context = context self._event = event self._action_tracking = TaskTrackingTable(context) self.action_id = self._event[tracking.TASK_TR_ID] self.task = self._event[tracking.TASK_TR_NAME] self.action = self._event[tracking.TASK_TR_ACTION] self.test_completion_method = getattr( actions.get_action_class(self.action), handlers.COMPLETION_METHOD, None) self.action_parameters = json.loads( self._event.get(tracking.TASK_TR_PARAMETERS, "{}")) self.action_resources = json.loads( self._event.get(tracking.TASK_TR_RESOURCES, "{}")) self.dryrun = self._event.get(tracking.TASK_TR_DRYRUN) self.debug = self._event.get(tracking.TASK_TR_DEBUG) self.started_at = float(self._event.get(tracking.TASK_TR_STARTED_TS, 0)) self.start_result = self._event.get(tracking.TASK_TR_START_RESULT, None) self.session = AwsService.get_session( self._event.get(tracking.TASK_TR_ASSUMED_ROLE)) self.stack_name = os.getenv(handlers.ENV_STACK_NAME) self.stack_id = os.getenv(handlers.ENV_STACK_ID) self.action = event[tracking.TASK_TR_ACTION] self.action_properties = actions.get_action_properties(self.action) self.action_class = actions.get_action_class(self.action) self._stack_resources = None self.timeout = self._event.get(tracking.TASK_TR_TIMEOUT) self.execution_log_stream = self._event.get( tracking.TASK_TR_EXECUTION_LOGSTREAM) # setup logging if self.execution_log_stream is None: dt = datetime.utcnow() self.execution_log_stream = LOG_STREAM.format( self._event[tracking.TASK_TR_NAME], dt.year, dt.month, dt.day, dt.hour, dt.minute, self.action_id) else: self.execution_log_stream = self.execution_log_stream debug = event[tracking.TASK_TR_DEBUG] self._logger = Logger(logstream=self.execution_log_stream, buffersize=40 if debug else 20, context=context, debug=debug)
def handle_request(self): """ Handles the cloudwatch rule timer event :return: Started tasks, if any, information """ try: start = datetime.now() count = 0 tracking_table = TaskTrackingTable(context=self._context, logger=self._logger) for task in tracking_table.get_tasks_to_check_for_completion(): count += 1 task_id = task[handlers.TASK_TR_ID] last_check_for_completion_time = datetime.now().isoformat() tracking_table.update_task( task_id, task=task.get(handlers.TASK_TR_NAME, None), task_metrics=task.get(handlers.TASK_TR_METRICS, False), status_data={ handlers.TASK_TR_LAST_WAIT_COMPLETION: last_check_for_completion_time }) self._logger.debug("Task is {}", task) self._logger.info(INF_SET_COMPLETION_TASK_TIMER, task.get(handlers.TASK_TR_NAME, None), task_id, last_check_for_completion_time) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INF_COMPLETION_ITEMS_SET, running_time, count) if count == 0 and not handlers.running_local(self._context): rule = handlers.disable_completion_cloudwatch_rule( self._context) self._logger.info(INF_DISABLED_COMPLETION_TIMER, rule) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "tasks-to-check": count }) except ValueError as ex: self._logger.error(ERR_COMPLETION_HANDLER, ex, safe_json(self._event, indent=2)) finally: self._logger.flush()
def handle_request(self): """ Handles the cloudwatch rule timer event :return: Started tasks, if any, information """ try: start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) count = 0 tracking_table = TaskTrackingTable(context=self._context) for task in tracking_table.get_tasks_to_check_for_completion(): count += 1 task_id = task[tracking.TASK_TR_ID] last_check_for_completion_time = datetime.now().isoformat() tracking_table.update_action( task_id, status_data={ tracking.TASK_TR_LAST_WAIT_COMPLETION: last_check_for_completion_time }) self._logger.info(INF_SET_COMPLETION_TASK_TIMER, task[tracking.TASK_TR_NAME], task_id, last_check_for_completion_time) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INF_COMPETION_ITEMS_SET, running_time, count) if count == 0: rule = handlers.disable_completion_cloudwatch_rule( self._context) self._logger.info(INF_DISABLED_COMPLETION_TIMER, rule) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "tasks-to_check": count }) except ValueError as ex: self._logger.error("{}\n{}".format( ex, safe_json(self._event, indent=2))) finally: self._logger.flush()
def tracking_table(self): """ Gets an instance of the tracking table and use it in subsequent calls :return: Instance tracking table """ if self._tracking_table is None: self._tracking_table = TaskTrackingTable(self._context) return self._tracking_table
def _leave_waiting_list(self, task_id, concurrency_key): """ Subtracts 1 from waiting list counter for the specified concurrency key and returns new value. If the counter reaches 0 then the entry for the concurrency key is removed :param concurrency_key: Concurrency key for counter :return: Updated counter """ # make a consistent read of the task self.tracking_table.get_task_item(task_id) if not handlers.running_local(self._context): resp = self.concurrency_table.update_item_with_retries( Key={CONCURRENCY_ID: concurrency_key}, UpdateExpression="ADD InstanceCount :min_one SET RunNext=:run", ExpressionAttributeValues={ ":min_one": -1, ":run": True }, ReturnValues="UPDATED_NEW") count = max(0, int(resp["Attributes"].get(ACTIVE_INSTANCES, 0))) # remove entry if no more waiting items for this key if count == 0: self.concurrency_table.delete_item_with_retries( Key={CONCURRENCY_ID: concurrency_key}) else: resp = self.concurrency_table.get_item_with_retries( Key={CONCURRENCY_ID: concurrency_key}) count = resp.get("Item", {}).get(ACTIVE_INSTANCES, 0) TaskTrackingTable._run_local_stream_event( os.getenv(handlers.ENV_CONCURRENCY_TABLE), "UPDATE", { "ConcurrencyId": concurrency_key, "InstanceCount": count }, { "ConcurrencyId": concurrency_key, "InstanceCount": count + 1 }, self._context) return count
def __init__(self, event, context, logger=None, tracking_store=None): def log_stream_name(): classname = self.__class__.__name__ dt = datetime.utcnow() account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_ACCOUNT, "") regions = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_REGIONS, []) if account is not None and len(regions) > 0: account_and_region = "-".join([account, regions[0]]) + "-" else: region = "" if self.sub_task is not None: account = "" if self._this_account: if len(self._accounts) == 0: account = os.getenv( handlers.ENV_OPS_AUTOMATOR_ACCOUNT) elif len(self._accounts) == 1: account = self._accounts[0] region = self._regions[0] if len( self._regions) == 1 else "" if account != "": if region not in ["", None]: account_and_region = "-".join([account, region]) + "-" else: account_and_region = account else: account_and_region = "" return LOG_STREAM.format(classname, self.task[handlers.TASK_NAME], account_and_region, dt.year, dt.month, dt.day) self._context = context self._event = event self.task = event[handlers.HANDLER_EVENT_TASK] self.sub_task = event.get(handlers.HANDLER_EVENT_SUB_TASK, None) self.use_custom_select = event.get( handlers.HANDLER_EVENT_CUSTOM_SELECT, True) # the job id is used to correlate all generated tasks for the selected resources self.task_group = self._event.get(handlers.HANDLER_EVENT_TASK_GROUP, None) if self.task_group is None: self.task_group = str(uuid.uuid4()) debug = event[handlers.HANDLER_EVENT_TASK].get(handlers.TASK_DEBUG, False) if logger is None: self._logger = QueuedLogger(logstream=log_stream_name(), context=context, buffersize=50 if debug else 20, debug=debug) else: self._logger = logger self._sts = None self.select_args = event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}) self.task_dt = event[handlers.HANDLER_EVENT_TASK_DT] self.action_properties = actions.get_action_properties( self.task[handlers.TASK_ACTION]) self.action_class = actions.get_action_class( self.task[handlers.TASK_ACTION]) self.task_parameters = self.task.get(handlers.TASK_PARAMETERS, {}) self.metrics = self.task.get(handlers.TASK_METRICS, False) self.service = self.action_properties[actions.ACTION_SERVICE] self.keep_tags = self.action_properties.get( actions.ACTION_KEEP_RESOURCE_TAGS, True) self.source = self._event.get(handlers.HANDLER_EVENT_SOURCE, handlers.UNKNOWN_SOURCE) self.run_local = handlers.running_local(self._context) self._timer = None self._timeout_event = self._timeout_event = threading.Event() self.aggregation_level = self.action_properties.get( actions.ACTION_AGGREGATION, actions.ACTION_AGGREGATION_RESOURCE) if self.aggregation_level is not None and isinstance( self.aggregation_level, types.FunctionType): self.aggregation_level = self.aggregation_level( self.task_parameters) self.batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE) if self.batch_size is not None and isinstance(self.batch_size, types.FunctionType): self.batch_size = self.batch_size(self.task_parameters) self.actions_tracking = TaskTrackingTable( self._context, logger=self._logger) if tracking_store is None else tracking_store
class SelectResourcesHandler(object): """ Class that handles the selection of AWS service resources for a task to perform its action on. """ def __init__(self, event, context, logger=None, tracking_store=None): def log_stream_name(): classname = self.__class__.__name__ dt = datetime.utcnow() account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_ACCOUNT, "") regions = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_REGIONS, []) if account is not None and len(regions) > 0: account_and_region = "-".join([account, regions[0]]) + "-" else: region = "" if self.sub_task is not None: account = "" if self._this_account: if len(self._accounts) == 0: account = os.getenv( handlers.ENV_OPS_AUTOMATOR_ACCOUNT) elif len(self._accounts) == 1: account = self._accounts[0] region = self._regions[0] if len( self._regions) == 1 else "" if account != "": if region not in ["", None]: account_and_region = "-".join([account, region]) + "-" else: account_and_region = account else: account_and_region = "" return LOG_STREAM.format(classname, self.task[handlers.TASK_NAME], account_and_region, dt.year, dt.month, dt.day) self._context = context self._event = event self.task = event[handlers.HANDLER_EVENT_TASK] self.sub_task = event.get(handlers.HANDLER_EVENT_SUB_TASK, None) self.use_custom_select = event.get( handlers.HANDLER_EVENT_CUSTOM_SELECT, True) # the job id is used to correlate all generated tasks for the selected resources self.task_group = self._event.get(handlers.HANDLER_EVENT_TASK_GROUP, None) if self.task_group is None: self.task_group = str(uuid.uuid4()) debug = event[handlers.HANDLER_EVENT_TASK].get(handlers.TASK_DEBUG, False) if logger is None: self._logger = QueuedLogger(logstream=log_stream_name(), context=context, buffersize=50 if debug else 20, debug=debug) else: self._logger = logger self._sts = None self.select_args = event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}) self.task_dt = event[handlers.HANDLER_EVENT_TASK_DT] self.action_properties = actions.get_action_properties( self.task[handlers.TASK_ACTION]) self.action_class = actions.get_action_class( self.task[handlers.TASK_ACTION]) self.task_parameters = self.task.get(handlers.TASK_PARAMETERS, {}) self.metrics = self.task.get(handlers.TASK_METRICS, False) self.service = self.action_properties[actions.ACTION_SERVICE] self.keep_tags = self.action_properties.get( actions.ACTION_KEEP_RESOURCE_TAGS, True) self.source = self._event.get(handlers.HANDLER_EVENT_SOURCE, handlers.UNKNOWN_SOURCE) self.run_local = handlers.running_local(self._context) self._timer = None self._timeout_event = self._timeout_event = threading.Event() self.aggregation_level = self.action_properties.get( actions.ACTION_AGGREGATION, actions.ACTION_AGGREGATION_RESOURCE) if self.aggregation_level is not None and isinstance( self.aggregation_level, types.FunctionType): self.aggregation_level = self.aggregation_level( self.task_parameters) self.batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE) if self.batch_size is not None and isinstance(self.batch_size, types.FunctionType): self.batch_size = self.batch_size(self.task_parameters) self.actions_tracking = TaskTrackingTable( self._context, logger=self._logger) if tracking_store is None else tracking_store @classmethod def is_handling_request(cls, event, _): """ Tests if this handler handles the event. :param _: :param event: The event tyo test :return: True if the event is handled by this handler """ return event.get(handlers.HANDLER_EVENT_ACTION, "") == handlers.HANDLER_ACTION_SELECT_RESOURCES @property def _task_tag(self): """ Returns the name of the tag that contains the list of actions for a resource. :return: The name of the tag that contains the list of actions for a resource """ name = os.environ.get(handlers.ENV_AUTOMATOR_TAG_NAME) if name is None: name = handlers.DEFAULT_SCHEDULER_TAG return name @property def sts(self): if self._sts is None: self._sts = boto3.client("sts") return self._sts @property def _resource_name(self): name = self.action_properties[actions.ACTION_RESOURCES] if name in [None, ""]: name = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get( handlers.HANDLER_EVENT_RESOURCE_NAME, "") return name def _check_can_execute(self, selected_resources): """ Checks if the action for the task can be executed with the selected resources :param selected_resources: :return: """ check_method = getattr(self.action_class, actions.CHECK_CAN_EXECUTE, None) if check_method: try: check_method(selected_resources, self.task_parameters) return True except ValueError as ex: self._logger.error(ERR_CAN_NOT_EXECUTE_WITH_THESE_RESOURCES, self.task[handlers.TASK_ACTION], self.task[handlers.TASK_NAME], str(ex)) return False return True def _task_assumed_roles(self): """ Returns a list of service instances for each handled account/role :return: """ # account can optionally be passed in by events account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_ACCOUNT) if account is not None: assumed_role = handlers.get_account_role(account=account, task=self.task, logger=self._logger) if assumed_role is None: if account != os.getenv(handlers.ENV_OPS_AUTOMATOR_ACCOUNT): self._logger.error(ERR_ACCOUNT_SKIPPED_NO_ROLE, account) yield None else: yield assumed_role else: # no role if processing scheduled task in own account if self._this_account: assumed_role = handlers.get_account_role(account=os.getenv( handlers.ENV_OPS_AUTOMATOR_ACCOUNT), task=self.task, logger=self._logger) yield assumed_role for acct in self._accounts: # for external accounts assumed_role = handlers.get_account_role(account=acct, task=self.task, logger=self._logger) if assumed_role is not None: yield assumed_role @property def _this_account(self): if self.sub_task is not None: return self.sub_task[handlers.TASK_THIS_ACCOUNT] return self.task.get(handlers.TASK_THIS_ACCOUNT, True) @property def _accounts(self): if self.sub_task is not None: return self.sub_task[handlers.TASK_ACCOUNTS] return self.task.get(handlers.TASK_ACCOUNTS, []) @property def _regions(self): """ Returns the regions in where resources are selected :return: """ regions = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_REGIONS) if regions is None: regions = self.sub_task[ handlers. TASK_REGIONS] if self.sub_task is not None else self.task.get( handlers.TASK_REGIONS, [None]) else: # check if the regions in the event are in the task configurations regions checked_regions = [r for r in regions if r in regions] if len(checked_regions) != len(regions): self._logger.warning(WARN_REGION_NOT_IN_TASK_CONFIGURATION, self._event) return checked_regions return regions if len(regions) > 0 else [None] def handle_request(self): """ Handles the select resources request. Creates new actions for resources found for a task :return: Results of handling the request """ def filter_by_action_filter(srv, used_role, r): filter_method = getattr(self.action_class, actions.SELECT_AND_PROCESS_RESOURCE_METHOD, None) if filter_method is not None: r = filter_method(srv, self._logger, self._resource_name, r, self._context, self.task, used_role) if r is None: self._logger.debug( DEBUG_FILTER_METHOD, self.action_class.__name__, actions.SELECT_AND_PROCESS_RESOURCE_METHOD) return None else: self._logger.debug( DEBUG_FILTERED_RESOURCE, self.action_class.__name__, actions.SELECT_AND_PROCESS_RESOURCE_METHOD, safe_json(r, indent=3)) return r def is_selected_resource(aws_service, resource, used_role, taskname, tags_filter, does_resource_supports_tags): # No tags then just use filter method if any if not does_resource_supports_tags: self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource) return filter_by_action_filter(srv=aws_service, used_role=used_role, r=resource) tags = resource.get("Tags", {}) # name of the tag that holds the list of tasks for this resource tagname = self._task_tag if tags_filter is None: # test if name of the task is in list of tasks in tag value if (tagname not in tags) or (taskname not in tagging.split_task_list( tags[tagname])): self._logger.debug( DEBUG_RESOURCE_NOT_SELECTED, safe_json(resource, indent=2), taskname, ','.join( ["'{}'='{}'".format(t, tags[t]) for t in tags])) return None self._logger.debug(DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE, safe_json(resource, indent=2), tagname, taskname) else: # using a tag filter, * means any tag if tags_filter != tagging.tag_filter_set.WILDCARD_CHAR: # test if there are any tags matching the tag filter if not TagFilterExpression(tags_filter).is_match(tags): self._logger.debug( DEBUG_RESOURCE_NOT_SELECTED_TAG_FILTER, safe_json(resource, indent=2), taskname, ','.join([ "'{}'='{}'".format(t, tags[t]) for t in tags ])) return None self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER, safe_json(resource, indent=2), tags, tag_filter_str, taskname) else: self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER, safe_json(resource, indent=2), taskname) return filter_by_action_filter(srv=aws_service, used_role=used_role, r=resource) return filter_by_action_filter(srv=aws_service, used_role=used_role, r=resource) def resource_batches(resources): """ Returns resources as chunks of size items. If the class has an optional custom aggregation function then the resources are aggregated first using this function before applying the batch size :param resources: resources to process :return: Generator for blocks of resource items """ aggregate_func = getattr(self.action_class, actions.CUSTOM_AGGREGATE_METHOD, None) for i in aggregate_func( resources, self.task_parameters, self._logger) if aggregate_func is not None else [ resources ]: if self.batch_size is None: yield i else: first = 0 while first < len(i): yield i[first:first + self.batch_size] first += self.batch_size def setup_tag_filtering(t_name): # get optional tag filter no_select_by_tags = self.action_properties.get( actions.ACTION_NO_TAG_SELECT, False) if no_select_by_tags: tag_filter_string = tagging.tag_filter_set.WILDCARD_CHAR else: tag_filter_string = self.task.get(handlers.TASK_TAG_FILTER) # set if only a single task is required for selecting the resources, it is used to optimise the select select_tag = None if tag_filter_string is None: self._logger.debug(DEBUG_SELECT_BY_TASK_NAME, self._resource_name, self._task_tag, t_name) select_tag = self._task_tag elif tag_filter_string == tagging.tag_filter_set.WILDCARD_CHAR: self._logger.debug(DEBUG_SELECT_ALL_RESOURCES, self._resource_name) else: self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES, self._resource_name) # build the tag expression that us used to filter the resources tag_filter_expression = TagFilterExpression(tag_filter_string) # the keys of the used tags tag_filter_expression_tag_keys = list( tag_filter_expression.get_filter_keys()) # if there is only a single tag then we can optimize by just filtering on that specific tag if len(tag_filter_expression_tag_keys) == 1 and \ tagging.tag_filter_set.WILDCARD_CHAR not in tag_filter_expression_tag_keys[0]: select_tag = tag_filter_expression_tag_keys[0] return select_tag, tag_filter_string def add_aggregated(aggregated_resources): # create tasks action for aggregated resources , optionally split in batch size chunks for ra in resource_batches(aggregated_resources): if self._check_can_execute(ra): action_item = self.actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=ra, task_datetime=self.task_dt, source=self.source, task_group=self.task_group) self._logger.debug(DEBUG_ADDED_AGGREGATED_RESOURCES_TASK, action_item[handlers.TASK_TR_ID], len(ra), self._resource_name, self.task[handlers.TASK_NAME]) self._logger.debug("Added item\n{}", safe_json(action_item, indent=3)) yield action_item def add_as_individual(resources): for ri in resources: # task action for each selected resource if self._check_can_execute([ri]): action_item = self.actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=ri, task_datetime=self.task_dt, source=self.source, task_group=self.task_group) self._logger.debug(DEBUG_ADD_SINGLE_RESOURCE_TASK, action_item[handlers.TASK_TR_ID], self._resource_name, self.task[handlers.TASK_NAME]) self._logger.debug("Added item\n{}", safe_json(action_item, indent=3)) yield action_item try: task_items = [] start = datetime.now() self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=3)) self._logger.debug(DEBUG_ACTION, safe_json(self.action_properties, indent=3)) self._logger.info(INFO_SELECTED_RESOURCES, self._resource_name, self.service, self.task[handlers.TASK_NAME]) self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level) task_level_aggregated_resources = [] args = self._build_describe_argument() service_resource_with_tags = services.create_service( self.service).resources_with_tags if self._resource_name == "": supports_tags = len(service_resource_with_tags) != 0 else: supports_tags = self._resource_name.lower() in [ r.lower() for r in service_resource_with_tags ] args["tags"] = supports_tags self._logger.info(INFO_USE_TAGS_TO_SELECT, "R" if supports_tags else "No r") task_name = self.task[handlers.TASK_NAME] count_resource_items = 0 selected_resource_items = 0 select_on_tag, tag_filter_str = setup_tag_filtering(task_name) filter_func = getattr(self.action_class, actions.FILTER_RESOURCE_METHOD, None) # timer to guard selection time and log warning if getting close to lambda timeout if self._context is not None: self.start_timer(REMAINING_TIME_AFTER_DESCRIBE) try: for assumed_role in self._task_assumed_roles(): retry_strategy = get_default_retry_strategy( service=self.service, context=self._context) service = services.create_service( service_name=self.service, service_retry_strategy=retry_strategy, role_arn=assumed_role) if self.is_timed_out(): break # contains resources for account account_level_aggregated_resources = [] self._logger.info(INFO_ACCOUNT, service.aws_account) if assumed_role not in [None, ""]: self._logger.info(INFO_ASSUMED_ROLE, assumed_role) for region in self._regions: # test for timeouts if self.is_timed_out(): break # handle region passed in the event if region is not None: args["region"] = region else: if "region" in args: del args["region"] # resources can be passed in the invent by event handlers all_resources = self._event.get( handlers.HANDLER_SELECT_RESOURCES, None) if all_resources is None: # actions can have an optional method to select resources action_custom_describe_function = getattr( self.action_class, "describe_resources", None) if action_custom_describe_function is not None and self.use_custom_select: all_resources = action_custom_describe_function( service, self.task, region) else: # select resources from the service self._logger.debug(DEBUG_SELECT_PARAMETERS, self._resource_name, self.service, args) # selecting a list of all resources in this account/region all_resources = list( service.describe( self._resource_name, filter_func=filter_func, select_on_tag=select_on_tag, **args)) # test for timeout if self.is_timed_out(): break count_resource_items += len(all_resources) self._logger.info(INFO_RESOURCES_FOUND, len(all_resources)) # select resources that are processed by the task selected_resources = [] for sr in all_resources: sel = is_selected_resource( aws_service=service, resource=sr, used_role=assumed_role, taskname=task_name, tags_filter=tag_filter_str, does_resource_supports_tags=supports_tags) if sel is not None: selected_resources.append(sel) selected_resource_items += len(selected_resources) # display found and selected resources if len(all_resources) > 0: self._logger.info(INFO_RESOURCES_SELECTED, len(selected_resources)) if len(selected_resources) == 0: continue # delete tags if not needed by the action if not self.keep_tags: for res in selected_resources: if "Tags" in res: del res["Tags"] # add resources to total list of resources for this task if self.aggregation_level == actions.ACTION_AGGREGATION_TASK: task_level_aggregated_resources += selected_resources # add resources to list of resources for this account if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT: account_level_aggregated_resources += selected_resources # add batch(es) of resources for this region if self.aggregation_level == actions.ACTION_AGGREGATION_REGION and len( selected_resources) > 0: task_items += list( add_aggregated(selected_resources)) # no aggregation, add each individual resource if self.aggregation_level == actions.ACTION_AGGREGATION_RESOURCE and len( selected_resources) > 0: task_items += list( add_as_individual(selected_resources)) # at the end of the region loop, check if aggregated resources for account need to be added if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT and len( account_level_aggregated_resources) > 0: task_items += list( add_aggregated(account_level_aggregated_resources)) # at the end of the accounts loop, check if aggregated resources for task need to be added if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len( task_level_aggregated_resources) > 0: task_items += list( add_aggregated(task_level_aggregated_resources)) except Exception as ex: raise_exception(ERR_SELECTING_TASK_RESOURCES, self.task[handlers.TASK_NAME], ex) finally: if self._timer is not None: # cancel time used avoid timeouts when selecting resources self._timer.cancel() if self.is_timed_out(): raise_exception(ERR_TIMEOUT_SELECTING_RESOURCES, self._resource_name, self.service, task_name) self.start_timer(REMAINING_TIME_AFTER_STORE) self.actions_tracking.flush(self._timeout_event) if self.is_timed_out(): raise_exception( ERR_CREATING_TASKS_FOR_SELECTED_RESOURCES, task_name) self._timer.cancel() else: self.actions_tracking.flush() self._logger.info(INFO_ADDED_ITEMS, len(task_items), self.task[handlers.TASK_NAME]) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INFO_RESULT, running_time) if self.metrics: put_task_select_data(task_name=task_name, items=count_resource_items, selected_items=selected_resource_items, logger=self._logger, selection_time=running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "dispatched-tasks": task_items }) finally: self._logger.flush() def select_timed_out(self): """ Function is called when the handling of the request times out :return: """ time_used = int(os.getenv(handlers.ENV_LAMBDA_TIMEOUT, 900)) - int( (self._context.get_remaining_time_in_millis() / 1000)) self._logger.error(ERR_TIMEOUT_SELECT_OR_STORE, time_used, self.task[handlers.TASK_NAME]) self._timeout_event.set() self._logger.flush() self._timer.cancel() def start_timer(self, remaining): execution_time_left = (self._context.get_remaining_time_in_millis() / 1000.00) - remaining self._timer = threading.Timer(execution_time_left, self.select_timed_out) self._timer.start() def is_timed_out(self): return self._timeout_event is not None and self._timeout_event.is_set() def _build_describe_argument(self): """ Build the argument for the describe call that selects the resources :return: arguments for describe call """ args = {} # get the mapping for parameters that should be used as parameters to the describe method call to select the resources action_parameters = self.action_properties.get( actions.ACTION_PARAMETERS, {}) for p in [ p for p in action_parameters if action_parameters[p].get( actions.PARAM_DESCRIBE_PARAMETER) is not None ]: if self.task_parameters.get(p) is not None: args[action_parameters[p] [actions. PARAM_DESCRIBE_PARAMETER]] = self.task_parameters[p] # also add describe method parameters specified as select parameters in the metadata of the action select_parameters = self.action_properties.get( actions.ACTION_SELECT_PARAMETERS, {}) if types.FunctionType == type(select_parameters): select_parameters = select_parameters(self.task, self.task_parameters) for p in select_parameters: args[p] = select_parameters[p] # region and account are separate describe parameters args.update({ a: self.select_args[a] for a in self.select_args if a not in [ handlers.HANDLER_EVENT_REGIONS, handlers.HANDLER_EVENT_ACCOUNT, handlers.HANDLER_EVENT_RESOURCE_NAME ] }) # action specified select jmes-path expression for resources if actions.ACTION_SELECT_EXPRESSION in self.action_properties: # replace parameter placeholders with values. We cant use str.format here are the jmespath expression may contain {} # as well for projection of attributes, so the use placeholders for parameter names in format %paramname% jmes = self.action_properties[actions.ACTION_SELECT_EXPRESSION] for p in self.task_parameters: jmes = jmes.replace("%{}%".format(p), str(self.task_parameters[p])) args["select"] = jmes return args
class ExecutionHandler: """ Class to handle event to execute an action on a resource. """ def __init__(self, event, context): """ Initializes handler. :param event: Event to handle :param context: Context if run within Lambda environment """ self._context = context self._event = event self._action_tracking = TaskTrackingTable(context) self.action_id = self._event[tracking.TASK_TR_ID] self.task = self._event[tracking.TASK_TR_NAME] self.action = self._event[tracking.TASK_TR_ACTION] self.test_completion_method = getattr( actions.get_action_class(self.action), handlers.COMPLETION_METHOD, None) self.action_parameters = json.loads( self._event.get(tracking.TASK_TR_PARAMETERS, "{}")) self.action_resources = json.loads( self._event.get(tracking.TASK_TR_RESOURCES, "{}")) self.dryrun = self._event.get(tracking.TASK_TR_DRYRUN) self.debug = self._event.get(tracking.TASK_TR_DEBUG) self.started_at = float(self._event.get(tracking.TASK_TR_STARTED_TS, 0)) self.start_result = self._event.get(tracking.TASK_TR_START_RESULT, None) self.session = AwsService.get_session( self._event.get(tracking.TASK_TR_ASSUMED_ROLE)) self.stack_name = os.getenv(handlers.ENV_STACK_NAME) self.stack_id = os.getenv(handlers.ENV_STACK_ID) self.action = event[tracking.TASK_TR_ACTION] self.action_properties = actions.get_action_properties(self.action) self.action_class = actions.get_action_class(self.action) self._stack_resources = None self.timeout = self._event.get(tracking.TASK_TR_TIMEOUT) self.execution_log_stream = self._event.get( tracking.TASK_TR_EXECUTION_LOGSTREAM) # setup logging if self.execution_log_stream is None: dt = datetime.utcnow() self.execution_log_stream = LOG_STREAM.format( self._event[tracking.TASK_TR_NAME], dt.year, dt.month, dt.day, dt.hour, dt.minute, self.action_id) else: self.execution_log_stream = self.execution_log_stream debug = event[tracking.TASK_TR_DEBUG] self._logger = Logger(logstream=self.execution_log_stream, buffersize=40 if debug else 20, context=context, debug=debug) @staticmethod def is_handling_request(event): """ Tests if event is handled by this handler. :param event: Tested event :return: True if the event is handled by this handler """ return event.get(handlers.HANDLER_EVENT_ACTION, "") in [ handlers.HANDLER_ACTION_EXECUTE, handlers.HANDLER_ACTION_TEST_COMPLETION ] @property def stack_resources(self): """ Reads the action stack resources :return: Stack resources for the action """ if self._stack_resources is None: self._stack_resources = {} # test if this action has additional stack resources resources = self.action_properties.get( actions.ACTION_PARAM_STACK_RESOURCES, {}) if resources: # name of the class class_name = self.action_properties[actions.ACTION_CLASS_NAME] # actual resource names is name of class + name from class properties logical_resource_names = [ class_name + resource_name for resource_name in resources ] cfn = get_client_with_retries("cloudformation", ["list_stack_resources"], context=self._context) args = {"StackName": self.stack_id} while True: # get the stack resources cfn_resp = cfn.list_stack_resources_with_retries(**args) for res in cfn_resp.get("StackResourceSummaries", []): # actual name logical_resource_id = res["LogicalResourceId"] # test if this resource is an resource from the action properties if logical_resource_id in logical_resource_names: self._stack_resources[ logical_resource_id[len(class_name):]] = { i: res[i] for i in [ "LogicalResourceId", "PhysicalResourceId", "ResourceType" ] } # test if we've found the number of resources that we declared, in that case no need to read more if len(self._stack_resources.keys()) == len(resources): return self._stack_resources # continuation if > 100 resources in stack if "NextToken" in cfn_resp: args["NextToken"] = cfn_resp["NextToken"] else: break return self._stack_resources def _handle_task_execution(self, action_instance, args): def handle_metrics(result): self._logger.info( "Sending metrics data is {}", "enabled" if allow_send_metrics() else "disabled") if allow_send_metrics(): try: result_data = result if isinstance( result, dict) else json.loads(result) if actions.METRICS_DATA in result_data: send_metrics_data( metrics=result_data[actions.METRICS_DATA], logger=self._logger) except Exception as ex: self._logger.warning( "Error processing or sending metrics data ({})", str(ex)) self._logger.info(INFO_ACTION, self.action, self.action_id, "in dry-run mode " if self.dryrun else "", self.task, json.dumps(self.action_parameters, indent=2)) self._logger.info(INFO_LAMBDA_MEMORY, self._context.function_name, self._context.memory_limit_in_mb) self._action_tracking.update_action(self.action_id, status=tracking.STATUS_STARTED) start = time() return_data = { "task": self.task, "action": self.action, "id": self.action_id, "dryrun": self.dryrun, } action_result = action_instance.execute(args) if not action_instance.properties.get(actions.ACTION_INTERNAL, False): handle_metrics(action_result) execution_time = round(float((time() - start)), 3) if self.test_completion_method is None or self.dryrun: self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_COMPLETED, status_data={ tracking.TASK_TR_STARTED_TS: datetime.now().isoformat(), tracking.TASK_TR_RESULT: str(action_result), tracking.TASK_TR_EXECUTION_TIME: str(execution_time), tracking.TASK_TR_EXECUTION_LOGSTREAM: self.execution_log_stream }) self._logger.info(INFO_ACTION_RESULT, execution_time, str(action_result)) else: # the action has a method for testing completion of the task, set the status to waiting and store the result # of the execution that started the action as start result that will be passed to the completion method together self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_WAIT_FOR_COMPLETION, status_data={ tracking.TASK_TR_LAST_WAIT_COMPLETION: datetime.now().isoformat(), tracking.TASK_TR_STARTED_TS: int(start), tracking.TASK_TR_START_RESULT: str(action_result), tracking.TASK_TR_START_EXECUTION_TIME: str(execution_time), tracking.TASK_TR_EXECUTION_LOGSTREAM: self.execution_log_stream }) self._logger.info(INFO_STARTED_AND_WAITING_FOR_COMPLETION, str(action_result)) if self._context is not None: rule = handlers.enable_completion_cloudwatch_rule( self._context) self._logger.info(INFO_RULE_ENABLED, rule) # no exception from action return_data.update({ "result": tracking.STATUS_COMPLETED, "action-result": str(action_result), "datetime": datetime.now().isoformat(), "running-time": execution_time }) return safe_dict(return_data) def _handle_test_task_completion(self, action_instance, arguments): self._logger.info(INFO_CHECK_TASK_COMPLETION, self.action, self.task, self.action_id, json.dumps(self.action_parameters, indent=2), self.start_result) execution_time = round(float((time() - self.started_at)), 3) execution_time_str = str(timedelta(seconds=execution_time)) result_data = { "task": self.task, "action": self.action, "id": self.action_id, "datetime": datetime.now().isoformat(), "running-time": execution_time } # make one more check for completion before testing for timeout check_result = action_instance.is_completed(arguments, self.start_result) if check_result is not None: self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_COMPLETED, status_data={ tracking.TASK_TR_RESULT: str(check_result), tracking.TASK_TR_EXECUTION_TIME: str(execution_time) }) self._logger.info(INFO_TASK_COMPLETED, str(check_result), execution_time_str) result_data.update({ "result": tracking.STATUS_COMPLETED, "action-result": str(check_result) }) elif execution_time > self.timeout: self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_TIMED_OUT, status_data={ tracking.TASK_TR_EXECUTION_TIME: str(execution_time) }) self._logger.error(ERR_TASK_TIMEOUT, execution_time_str) result_data.update({"result": tracking.STATUS_TIMED_OUT}) return result_data else: self._logger.info(INFO_ACTION_NOT_COMPLETED, execution_time_str) result_data.update({"result": tracking.STATUS_WAIT_FOR_COMPLETION}) return safe_dict(result_data) def handle_request(self): """ Handles action execute requests, creates an instance of the required action class and executes the action on the resources passed in the event. :return: """ try: self._logger.info("Handler {}", self.__class__.__name__) args = { actions.ACTION_PARAM_CONTEXT: self._context, actions.ACTION_PARAM_EVENT: self._event, actions.ACTION_PARAM_SESSION: self.session, actions.ACTION_PARAM_LOGGER: self._logger, actions.ACTION_PARAM_RESOURCES: self.action_resources, actions.ACTION_PARAM_DEBUG: self.debug, actions.ACTION_PARAM_DRYRUN: self.dryrun, actions.ACTION_PARAM_ACTION_ID: self.action_id, actions.ACTION_PARAM_TASK: self.task, actions.ACTION_PARAM_STACK: self.stack_name, actions.ACTION_PARAM_STACK_ID: self.stack_id, actions.ACTION_PARAM_STACK_RESOURCES: self.stack_resources } args.update(self.action_parameters) action_instance = actions.create_action(self.action, args) if self._event[ handlers. HANDLER_EVENT_ACTION] == handlers.HANDLER_ACTION_EXECUTE: return self._handle_task_execution(action_instance, args) elif self._event[ handlers. HANDLER_EVENT_ACTION] == handlers.HANDLER_ACTION_TEST_COMPLETION: return self._handle_test_task_completion(action_instance, args) except Exception as ex: self._logger.error(ERR_EXECUTION_TASK, self._event[handlers.HANDLER_EVENT_ACTION], self.task, str(ex), traceback.format_exc()) self._action_tracking.update_action( action_id=self.action_id, status=tracking.STATUS_FAILED, status_data={tracking.TASK_TR_ERROR: str(ex)}) finally: self._logger.flush()
def _action_tracking(self): if self.__action_tracking is None: self.__action_tracking = TaskTrackingTable(self._context, logger=self._logger) return self.__action_tracking
def handle_request(self): """ Handles the select resources request. Creates new actions for resources found for a task :return: Results of handling the request """ def is_selected_resource(resource, taskname, tags_filter, does_resource_supports_tags): """ Tests if item is a selected resource for this task :param resource: The tested resource :param taskname: Name of the task :param tags_filter: Tag filter :param does_resource_supports_tags: Trie if the resource supports tags :return: True if resource is selected """ # No tags then always selected if not does_resource_supports_tags: self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource) return True tags = resource.get("Tags", {}) # name of the tag that holds the list of tasks for this resource tagname = self._task_tag if tags_filter is None: # test if name of the task is in list of tasks in tag value if tagname in tags and taskname in [ x.strip() for x in tags[tagname].split(',') ]: self._logger.debug( DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE, safe_json(resource, indent=2), tagname, taskname) return True else: # using a tag filter, * means any tag if tags_filter == "*": self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER, safe_json(resource, indent=2), taskname) return True # test if there are any tags matching the tag filter matched_tags = TagFilterSet( tags_filter).pairs_matching_any_filter(tags) if len(matched_tags) != 0: self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER, safe_json(resource, indent=2), matched_tags, tag_filter, taskname) return True self._logger.debug(DEBUG_RESOURCE_NOT_SELECTED, safe_json(resource, indent=2), taskname) return False def resource_batches(resources): """ Returns resources as chunks of size items. If the class has an optional custom aggregation function then the reousrces are aggregated first using this function before applying the batch size :param resources: resources to process :return: Generator for blocks of resource items """ aggregate_func = getattr(self.action_class, actions.CUSTOM_AGGREGATE_METHOD, None) batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE) for i in aggregate_func( resources, self.task_parameters) if aggregate_func is not None else [ resources ]: if batch_size is None: yield i else: first = 0 while first < len(i): yield i[first:first + batch_size] first += batch_size try: items = [] start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=2)) self._logger.debug(DEBUG_ACTION, safe_json(self.action_properties, indent=2)) self._logger.info(INFO_SELECTED_RESOURCES, self.resource_name, self.service, self.task[handlers.TASK_NAME]) self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level) task_level_aggregated_resources = [] args = self._build_describe_argument() supports_tags = self.action_properties.get( actions.ACTION_RESOURCES) in services.create_service( self.service).resources_with_tags args["tags"] = supports_tags self._logger.info(INFO_USE_TAGS_TO_SELECT, "R" if supports_tags else "No r") task_name = self.task[handlers.TASK_NAME] # get optional tag filter tag_filter = self.task.get(handlers.TASK_TAG_FILTER) if tag_filter is None: self._logger.debug(DEBUG_SELECT_BY_TASK_NAME, self.resource_name, self._task_tag, task_name) elif tag_filter == "*": self._logger.debug(DEBUG_SELECT_ALL_RESOURCES, self.resource_name) else: self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES, self.resource_name) with TaskTrackingTable(self._context) as actions_tracking: for service in self._account_service_sessions(self.service): assumed_role = service.assumed_role self._logger.info(INFO_ACCOUNT, service.aws_account) if assumed_role is not None: self._logger.info(INFO_ASSUMED_ROLE, assumed_role) for region in self._regions: if region is not None: args["region"] = region else: if "region" in args: del args["region"] self._logger.debug(DEBUG_SELECT_PARAMETERS, self.resource_name, self.service, args) # selecting a list of all resources in this account/region all_resources = list( service.describe(self.resource_name, **args)) logstr = INFO_RESOURCES_FOUND.format( len(all_resources)) if region is not None: logstr = INFO_IN_REGION.format(logstr, region) self._logger.info(logstr) # select resources that are processed by the task selected = list([ sr for sr in all_resources if is_selected_resource( sr, task_name, tag_filter, supports_tags) ]) if len(all_resources) > 0: self._logger.info(INFO_RESOURCES_SELECTED, len(selected)) if len(selected) == 0: continue if not self.keep_tags: for res in selected: if "Tags" in res: del res["Tags"] if self.aggregation_level == actions.ACTION_AGGREGATION_TASK: task_level_aggregated_resources += selected elif self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT: if self._check_can_execute(selected): # create tasks action for account aggregated resources , optionally split in batch size chunks for r in resource_batches(selected): action_item = actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=r, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info( INFO_ACCOUNT_AGGREGATED, action_item[tracking.TASK_TR_ID], len(r), self.resource_name, self.task[handlers.TASK_NAME]) else: for res in selected: # task action for each selected resource action_item = actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=res, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info( INFO_RESOURCE, action_item[tracking.TASK_TR_ID], self.resource_name, self.task[handlers.TASK_NAME]) if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len( task_level_aggregated_resources) > 0: if self._check_can_execute( task_level_aggregated_resources): for r in resource_batches( task_level_aggregated_resources): # create tasks action for task aggregated resources , optionally split in batch size chunks action_item = actions_tracking.add_task_action( task=self.task, assumed_role=None, action_resources=r, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info(INFO_TASK_AGGREGATED, action_item[tracking.TASK_TR_ID], len(r), self.resource_name, self.task[handlers.TASK_NAME]) self._logger.info(INFO_ADDED_ITEMS, len(items), self.task[handlers.TASK_NAME]) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INFO_RESULT, running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "dispatched-tasks": items }) finally: self._logger.flush()