def is_selected_resource(resource, taskname, tags_filter, does_resource_supports_tags): """ Tests if item is a selected resource for this task :param resource: The tested resource :param taskname: Name of the task :param tags_filter: Tag filter :param does_resource_supports_tags: Trie if the resource supports tags :return: True if resource is selected """ # No tags then always selected if not does_resource_supports_tags: self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource) return True tags = resource.get("Tags", {}) # name of the tag that holds the list of tasks for this resource tagname = self._task_tag if tags_filter is None: # test if name of the task is in list of tasks in tag value if tagname in tags and taskname in [ x.strip() for x in tags[tagname].split(',') ]: self._logger.debug( DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE, safe_json(resource, indent=2), tagname, taskname) return True else: # using a tag filter, * means any tag if tags_filter == "*": self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER, safe_json(resource, indent=2), taskname) return True # test if there are any tags matching the tag filter matched_tags = TagFilterSet( tags_filter).pairs_matching_any_filter(tags) if len(matched_tags) != 0: self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER, safe_json(resource, indent=2), matched_tags, tag_filter, taskname) return True self._logger.debug(DEBUG_RESOURCE_NOT_SELECTED, safe_json(resource, indent=2), taskname) return False
def update_task(name, context=None, **kwargs): """ Updates the specified task. An exception is raised when the action does not exist. :param name: Name of the task. This name overwrites the name in kwargs if it is used there :param kwargs: Task parameters dictionary, see create_task for details. :param context: Lambda context :return: Updated task item """ with _get_logger(context=context) as logger: logger.info("update_task") config = TaskConfiguration(context=context, logger=logger) if name is None or len(name) == 0: raise ValueError(ERR_NO_TASK_NAME) item = config.get_config_item(name) if item is None: raise ValueError(ERR_TASK_DOES_NOT_EXIST.format(name)) # copy to avoid side effects when modifying arguments args = copy.deepcopy(kwargs) args[configuration.CONFIG_TASK_NAME] = name stack_id = item.get(configuration.CONFIG_STACK_ID) if stack_id is not None: args[configuration.CONFIG_STACK_ID] = stack_id item = config.put_config_item(**args) return safe_json(item)
def lambda_handler(event, context): try: dt = datetime.utcnow() log_stream = LOG_STREAM.format(dt.year, dt.month, dt.day) result = {} with Logger(logstream=log_stream, buffersize=20, context=context, debug=util.as_bool(os.getenv(configuration.ENV_TRACE, False))) as logger: logger.info("InstanceScheduler, version %version%") logger.debug("Event is {}", util.safe_json(event, indent=3)) for handler_type in [SchedulerRequestHandler, SchedulerSetupHandler, ScheduleResourceHandler, AdminCliRequestHandler, CloudWatchEventHandler]: if handler_type.is_handling_request(event): start = time() handler = handler_type(event, context) logger.info("Handler is {}".format(handler_type.__name__)) try: result = handler.handle_request() except Exception as e: logger.error("Error handling request {} by handler {}: ({})\n{}", json.dumps(event), handler_type.__name__, e, traceback.format_exc()) execution_time = round(float((time() - start)), 3) logger.info("Handling took {} seconds", execution_time) return result logger.debug("Request was not handled, no handler was able to handle this type of request {}", json.dumps(event)) finally: configuration.unload_scheduler_configuration()
def _create_period(self, period): self.number_of_periods += 1 period_name = PERIOD_NAME.format(self._schedule_resource_name, self.number_of_periods) self._logger.info(INF_PERIOD_NAME, period_name) for p in period: if p not in VALID_PERIOD_PROPERTIES: raise ValueError(ERR_INVALID_PERIOD_PROPERTY.format(p, ", ".join(VALID_PERIOD_PROPERTIES))) create_period_args = { configuration.NAME: period_name } self._set_if_specified(period, PROP_BEGIN_TIME, create_period_args, configuration.BEGINTIME) self._set_if_specified(period, PROP_END_TIME, create_period_args, configuration.ENDTIME) self._set_if_specified(period, PROP_MONTH_DAYS, create_period_args, configuration.MONTHDAYS) self._set_if_specified(period, PROP_MONTHS, create_period_args, configuration.MONTHS) self._set_if_specified(period, PROP_WEEKDAYS, create_period_args, configuration.WEEKDAYS) create_period_args[configuration.DESCRIPTION] = PERIOD_DESCRIPTION.format(self._schedule_resource_name, self.number_of_periods) description_config = period.get(PROP_DESCRIPTION, None) if description_config is not None: create_period_args[configuration.DESCRIPTION] = "{}, {}".format(description_config, create_period_args[configuration.DESCRIPTION]) period = self._admin.create_period(**create_period_args) self._logger.info(INF_PERIOD_CREATED, safe_json(period, 3)) return period_name, period.get(PROP_INSTANCE_TYPE, None)
def handle_request(self): """ Handles the custom resource request from cloudformation :return: """ start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.info("Cloudformation request is {}", safe_json(self._event, indent=2)) try: result = CustomResource.handle_request(self) return safe_dict({ "result": result, "datetime": datetime.now().isoformat(), "running-time": (datetime.now() - start).total_seconds() }) except Exception as ex: self._logger.error("{} {}", ex, traceback.format_exc()) raise ex finally: self._logger.flush()
def execute(self, _): self.logger.info("{}, version {}", self.properties[ACTION_TITLE], self.properties[ACTION_VERSION]) self.logger.info(INFO_START_SNAPSHOT_ACTION, self.instance_id, self.task) self.logger.debug("Instance block device mappings are {}", self.instance["BlockDeviceMappings"]) if self.backup_root_device: self.create_volume_snapshot(self.root_volume) if self.backup_data_devices: for volume in self.volumes: if volume != self.root_volume: self.create_volume_snapshot(volume) self.result[METRICS_DATA] = build_action_metrics( action=self, CreatedSnapshots=len(self.result.get("volumes", {}).values()), SnapshotsSizeTotal=sum([ volume.get("create_snapshot", {}).get("VolumeSize") for volume in self.result.get("volumes", {}).values() ])) return safe_json(self.result)
def get_actions(context=None): """ Returns a dictionary with all available actions, see get_action for details on returned items :param context: Lambda context :return: all available action """ with _get_logger(context=context) as logger: logger.info("get_actions") return safe_json({action_name: get_action(action_name, log_this_call=False) for action_name in actions.all_actions()})
def add_task_action(self, task, assumed_role, action_resources, task_datetime, source): """ Creates and adds a new action to be written to the tracking table. Note that the items are kept in an internal buffer and written in batches to the dynamodb table when the instance goes out of scope or the close method is called explicitly. :param task: Task that executes the action :param assumed_role: Role to assume to execute the action :param action_resources: Resources on which the action is performed :param task_datetime: Time the task was scheduled for :param source of event that started the task test run their actions :return: Created item """ item = { TASK_TR_ID: str(uuid.uuid4()), TASK_TR_NAME: task[handlers.TASK_NAME], TASK_TR_ACTION: task[handlers.TASK_ACTION], TASK_TR_CREATED: datetime.now().isoformat(), TASK_TR_CREATED_TS: int(time()), TASK_TR_SOURCE: source, TASK_TR_DT: task_datetime, TASK_TR_RESOURCES: safe_json(action_resources), TASK_TR_STATUS: STATUS_PENDING, TASK_TR_DEBUG: task[handlers.TASK_DEBUG], TASK_TR_DRYRUN: task[handlers.TASK_DRYRUN], TASK_TR_INTERNAL: task[handlers.TASK_INTERNAL], TASK_TR_TIMEOUT: task[handlers.TASK_TIMOUT] } if assumed_role is not None: item[TASK_TR_ASSUMED_ROLE] = assumed_role item[TASK_TR_ACCOUNT] = AwsService.account_from_role_arn(assumed_role) else: item[TASK_TR_ACCOUNT] = AwsService.get_aws_account() if len(task[handlers.TASK_PARAMETERS]) > 0: item[TASK_TR_PARAMETERS] = task[handlers.TASK_PARAMETERS] if item[TASK_TR_PARAMETERS]: item[TASK_TR_PARAMETERS] = safe_json(item[TASK_TR_PARAMETERS]) self._new_action_items.append(item) return item
def get_tasks(include_internal=False, context=None): """ Returns all available tasks :param include_internal: True if internal tasks must be included :param context: Lambda context :return: all available tasks """ with _get_logger(context=context) as logger: logger.info("get_tasks()") tasks = [t for t in TaskConfiguration(context=context, logger=logger).config_items(include_internal)] return safe_json(tasks)
def _create_schedule(self): self._logger.info(INF_SCHEDULE_NAME, self._schedule_resource_name) create_schedule_args = { configuration.NAME: self._schedule_resource_name } ps = self.resource_properties for pr in ps: # fix for typo in older release, fix parameter if old version with typo is used for compatibility if pr == "UseMaintenaceWindow": pr = PROP_USE_MAINTENANCE_WINDOW if pr not in VALID_SCHEDULE_PROPERTIES: raise ValueError(ERR_INVALID_SCHEDULE_PROPERTY.format(pr, ", ".join(VALID_SCHEDULE_PROPERTIES))) self._set_if_specified(ps, PROP_METRICS, create_schedule_args, dest_name=configuration.METRICS) self._set_if_specified(ps, PROP_OVERWRITE, create_schedule_args, dest_name=configuration.OVERWRITE) self._set_if_specified(ps, PROP_OVERRIDE_STATUS, create_schedule_args, dest_name=configuration.OVERRIDE_STATUS) self._set_if_specified(ps, PROP_USE_MAINTENANCE_WINDOW, create_schedule_args, dest_name=configuration.USE_MAINTENANCE_WINDOW) self._set_if_specified(ps, PROP_ENFORCED, create_schedule_args, dest_name=configuration.ENFORCED, default=False) self._set_if_specified(ps, PROP_HIBERNATE, create_schedule_args, dest_name=configuration.HIBERNATE, default=False) self._set_if_specified(ps, PROP_RETAIN_RUNNING, create_schedule_args, dest_name=configuration.RETAINED_RUNNING, default=False) self._set_if_specified(ps, PROP_STOP_NEW, create_schedule_args, dest_name=configuration.STOP_NEW_INSTANCES, default=True) self._set_if_specified(ps, PROP_TIMEZONE, create_schedule_args, dest_name=configuration.TIMEZONE, default="UTC") self._set_if_specified(ps, PROP_DESCRIPTION, create_schedule_args, dest_name=configuration.DESCRIPTION) self._set_if_specified(ps, PROP_SSM_MAINTENANCE_WINDOW, create_schedule_args, dest_name=configuration.SSM_MAINTENANCE_WINDOW) create_schedule_args[configuration.SCHEDULE_CONFIG_STACK] = self.stack_id periods = [] try: self.number_of_periods = 0 for period in ps.get(PROP_PERIODS, []): period_name, instance_type = self._create_period(period) if instance_type is not None: period_name = "{}{}{}".format(period_name, configuration.INSTANCE_TYPE_SEP, instance_type) periods.append(period_name) create_schedule_args[configuration.PERIODS] = periods schedule = self._admin.create_schedule(**create_schedule_args) self.physical_resource_id = self._schedule_resource_name self._logger.info(INF_SCHEDULE_CREATED, safe_json(schedule, 3)) except Exception as ex: self._delete_periods() raise ex
def handle_request(self): """ Handles the cloudwatch rule timer event :return: Started tasks, if any, information """ try: start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) count = 0 tracking_table = TaskTrackingTable(context=self._context) for task in tracking_table.get_tasks_to_check_for_completion(): count += 1 task_id = task[tracking.TASK_TR_ID] last_check_for_completion_time = datetime.now().isoformat() tracking_table.update_action( task_id, status_data={ tracking.TASK_TR_LAST_WAIT_COMPLETION: last_check_for_completion_time }) self._logger.info(INF_SET_COMPLETION_TASK_TIMER, task[tracking.TASK_TR_NAME], task_id, last_check_for_completion_time) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INF_COMPETION_ITEMS_SET, running_time, count) if count == 0: rule = handlers.disable_completion_cloudwatch_rule( self._context) self._logger.info(INF_DISABLED_COMPLETION_TIMER, rule) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "tasks-to_check": count }) except ValueError as ex: self._logger.error("{}\n{}".format( ex, safe_json(self._event, indent=2))) finally: self._logger.flush()
def get_task(name, context=None): """ Returns item for specified task :param name: Name of the task :param context: Lambda context :return: Task item, raises exception if task with specified name does not exist """ with _get_logger(context=context) as logger: logger.info("get_task") item = TaskConfiguration(context=context, logger=logger).get_config_item(name) if item is None: raise ValueError("not found: task with name {} does not exist".format(name)) return safe_json(item)
def lambda_handler(event, context): dt = datetime.utcnow() logstream = LOG_STREAM.format("OpsAutomatorMain", dt.year, dt.month, dt.day) with Logger(logstream=logstream, context=context, buffersize=20) as logger: logger.info("Ops Automator, version %version%") for handler_name in handlers.all_handlers(): if handlers.get_class_for_handler(handler_name).is_handling_request(event): handler = handlers.create_handler(handler_name, event, context) logger.info("Handler is {}", handler_name) try: result = handler.handle_request() logger.info(MSG_REQUEST_HANLED, handler_name, (datetime.utcnow() - dt).total_seconds()) return safe_dict(result) except Exception as e: logger.error(MSG_ERR_HANDLING_REQUEST, safe_json(event, indent=2), handler_name, e, traceback.format_exc()) return logger.error(MSG_NO_REQUEST_HANDLER, safe_json(event, indent=2))
def send_metrics_data(metrics, logger): """ Sends metrics data :param metrics: metrics data :param logger: logger :return: """ url = os.getenv(util.ENV_METRICS_URL, None) if url is None: logger.warning(WARN_ENV_METRICS_URL_NOT_SET, util.ENV_METRICS_URL) return solution_id = os.getenv(util.ENV_SOLUTION_ID, None) if solution_id is None: logger.warning(WARN_SOLUTION_ID_NOT_SET) return if solution_id is None: logger.warning(WARN_SOLUTION_ID_NOT_SET) return data_dict = { "TimeStamp": str(datetime.utcnow().isoformat()), "UUID": str(uuid.uuid4()), "Data": metrics, "Solution": solution_id, "Version": VERSION } data_json = safe_json(data_dict, indent=3) logger.info(INF_METRICS_DATA, data_json) headers = { 'content-type': 'application/json', "content-length": str(len(data_json)) } try: response = requests.post(url, data=data_json, headers=headers) response.raise_for_status() logger.debug(INF_METRICS_DATA_SENT, response.status_code, response.text) except Exception as exc: logger.warning(WARN_SENDING_METRICS_FAILED.format(str(exc)))
def is_completed(self, _, start_result): """ Tests if the copy snapshot action has been completed. This method uses the id of the copied snapshot and test if it does exist and is complete in the destination region. As long as this is not the case the method must return None :param _: not used :param start_result: output of initial execution :return: Result of copy action, None of not completed yet """ start_data = json.loads(start_result) if "current" in start_data: return start_data["current"] resp = self.client.describe_table_with_retries( TableName=self.tablename) if resp.get("Table", {}).get("TableStatus", "") == "ACTIVE": return safe_json(resp["Table"]) else: return None
def delete_task(name, exception_if_not_exists=False, context=None): """ Deletes the specified task :param name: Name of the task to be deleted, if the task does not exist an exception is raised :param exception_if_not_exists: if set to True raises an exception if the item does not exist :param context: Lambda context :return: Deleted task item """ with _get_logger(context=context) as logger: logger.info("delete_task") config = TaskConfiguration(context=context, logger=logger) if exception_if_not_exists: item = config.get_config_item(name) if item is None: raise ValueError(ERR_TASK_DOES_NOT_EXIST.format(name)) else: item = {"Name": name} config.delete_config_item(name) return safe_json(item)
def is_completed(self, _, start_results): """ Tests if the copy snapshot action has been completed. This method uses the id of the copied snapshot and test if it does exist and is complete in the destination region. As long as this is not the case the method must return None :param start_results: Result of the api that started the copy, contains the id of the snapshot in the destination region :param _: not used :return: Result of copy action, None if not completed yet """ # start result data is passed in as text, for this action it is json formatted snapshot_create_data = json.loads(start_results) # create service instance to test is snapshot exists ec2 = services.create_service( "ec2", session=self.session, service_retry_strategy=get_default_retry_strategy( "ec2", context=self.context)) copied_snapshot_id = snapshot_create_data["copied-snapshot-id"] # test if the snapshot with the id that was returned from the CopySnapshot API call exists and is completed copied_snapshot = ec2.get("Snapshots", region=self.destination_region, select="Snapshots[?State=='completed']", OwnerIds=["self"], Filters=[{ "Name": "snapshot-id", "Values": [copied_snapshot_id] }]) if copied_snapshot is not None: # action completed self.logger.info(INFO_CHECK_COMPLETED_RESULT, copied_snapshot) self.logger.info(INFO_COPY_COMPLETED, self.source_snapshot_id, self.source_region, copied_snapshot_id, self.destination_region) return safe_json(copied_snapshot) # not done yet self.logger.info(INFO_COPY_PENDING, copied_snapshot_id, self.destination_region) return None
def handle_request(self): start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.info("Cloudformation request is {}", safe_json(self._event, indent=2)) try: result = CustomResource.handle_request(self) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": (datetime.now() - start).total_seconds(), "result": result }) finally: self._logger.flush()
def execute(self, _): self.logger.info("{}, version {}", str(self.__class__).split(".")[-1], self.properties[ACTION_VERSION]) self.logger.debug("Implementation {}", __name__) get_resp = self.client.describe_table_with_retries( TableName=self.tablename) update_args = self._get_throughput_update(get_resp) self.result["current"] = get_resp if len(update_args) > 0: update_args["TableName"] = self.tablename self.logger.info( "Updating throughput for table and indexes with arguments {}", json.dumps(update_args, indent=2)) update_resp = self.client.update_table_with_retries(**update_args) self.result["update"] = update_resp else: self.logger.info( "Throughput for table {} and indexes already at requested capacity", self.tablename) return safe_json(self.result)
def _execute_task(self, task, dt=None): """ Execute a task by starting a lambda function that selects the resources for that action :param task: Task started :param dt: Task start datetime :return: """ event = { handlers.HANDLER_EVENT_ACTION: handlers.HANDLER_ACTION_SELECT_RESOURCES, handlers.HANDLER_EVENT_TASK: task, handlers.HANDLER_EVENT_SOURCE: "aws:events", handlers.HANDLER_EVENT_TASK_DT: dt.isoformat() if dt is not None else datetime.utcnow().isoformat() } if self._context is not None: # start lambda function to scan for task resources payload = str.encode(safe_json(event)) client = get_client_with_retries("lambda", ["invoke"], context=self._context) resp = client.invoke_with_retries(FunctionName=self._context.function_name, Qualifier=self._context.function_version, InvocationType="Event", LogType="None", Payload=payload) self._logger.info(INFO_LAMBDA, resp["StatusCode"], payload) else: # or if not running in lambda environment pass event to main task handler lambda_handler(event, None)
def handle_request(self): """ Handles the event :return: result of handling the event, result send back to REST admin api """ def snake_to_pascal_case(s): converted = "" s = s.strip("_").capitalize() i = 0 while i < len(s): if s[i] == "_": i += 1 converted += s[i].upper() else: converted += s[i] i += 1 return converted # noinspection PyShadowingNames def dict_to_pascal_case(d): d_result = {} if isinstance(d, dict): for i in d: key = snake_to_pascal_case(i) d_result[key] = dict_to_pascal_case(d[i]) return d_result elif isinstance(d, list): return [dict_to_pascal_case(l) for l in d] return d try: self._logger.info("Handler {} : Received CLI request {}", self.__class__.__name__, json.dumps(self._event)) # get access to admin api admin = ConfigAdmin(logger=self._logger, context=self._context) # get api action and map it to a function in the admin API fn_name = self.commands.get(self.action, None) if fn_name is None: raise ValueError("Command {} does not exist".format( self.action)) fn = getattr(admin, fn_name) # calling the mapped admin api method self._logger.info("Calling \"{}\" with parameters {}", fn.__name__, self.parameters) api_result = fn(**self.parameters) # convert to awscli PascalCase output format result = dict_to_pascal_case(api_result) # perform output transformation if fn_name in self.transformations: result = jmespath.search(self.transformations[fn_name], result) # log formatted result json_result = safe_json(result, 3) self._logger.info("Call result is {}", json_result) return result except Exception as ex: self._logger.info("Call failed, error is {}", str(ex)) return {"Error": str(ex)} finally: self._logger.flush()
def test_safeJson(self): d = dict(foo = 'foo"bar') s = util.safe_json(d) self.assertEqual(s, '{"foo": "foo\"bar"}')
def send_metrics_data(metrics, logger): """ Sends metrics data :param metrics: metrics data :param logger: logger :return: """ try: url = os.getenv(util.ENV_METRICS_URL, None) if url is None: logger.warning(WARN_ENV_METRICS_URL_NOT_SET, util.ENV_METRICS_URL) return solution_id = os.getenv(util.ENV_SOLUTION_ID, None) if solution_id is None: logger.warning(WARN_SOLUTION_ID_NOT_SET) return if solution_id is None: logger.warning(WARN_SOLUTION_ID_NOT_SET) return stackId = os.getenv(util.STACK_ID, None)[-36:] uuid_key = os.getenv(util.UUID_KEY) + str(stackId) user_agent_extra_string = os.getenv(util.USER_AGENT_EXTRA) try: if user_agent_extra_string is not None: solution_config = {"user_agent_extra": user_agent_extra_string} config = botocore.config.Config(**solution_config) else: config = None client = boto3.client("ssm", config=config) response = client.get_parameter(Name=uuid_key) uuid_parameter = response.get("Parameter", {}).get("Value") except botocore.exceptions.ClientError as ex: if ex.response.get("Error", {}).get("Code") == "ParameterNotFound": uuid_parameter = str(uuid.uuid4()) try: logger.info("creating a new parameter") client.put_parameter( Name=uuid_key, Description= "This is a unique id for each aws-instance-scheduler solution stack, for reporting metrics.", Value=uuid_parameter, Type="String") except Exception as ex: logger.info("Failed to create a new parameter") logger.info(ex) else: logger.warning("Error creating UUID parameter.") logger.info("uuid_parameter {} ".format(uuid_parameter)) data_dict = { "TimeStamp": str(datetime.utcnow().isoformat()), "UUID": uuid_parameter, "Data": metrics, "Solution": solution_id, "Version": VERSION } data_json = safe_json(data_dict, indent=3) logger.info(INF_METRICS_DATA, data_json) headers = { 'content-type': 'application/json', "content-length": str(len(data_json)) } response = requests.post(url, data=data_json, headers=headers) response.raise_for_status() logger.debug(INF_METRICS_DATA_SENT, response.status_code, response.text) except Exception as exc: logger.warning((WARN_SENDING_METRICS_FAILED.format(str(exc))))
def create_task(context=None, **kwargs): """ Creates a new task :param kwargs: Task parameters :param context: Lambda context Constants can be found in configuration/__init__.py -CONFIG_ACTION_NAME: Name of the action executed by the task, exception is raised if not specified or action does not exist (mandatory, string) -CONFIG_DEBUG: Set to True to log additional debug information for this task (optional, default False, boolean) -CONFIG_DESCRIPTION: Task description(optional, default None, string) -CONFIG_CROSS_ACCOUNT_ROLES: List of cross accounts for cross account processing. Note that roles if the account of a role has already been found in another role, or if the account of a role is the processed account of the scheduler a warning is generated when executing the task and the role is skipped (optional, default [], List<string>) -CONFIG_ENABLED: Set to True to enable execution of task, False to suspend executions (optional, default True, boolean) -CONFIG_INTERNAL: Flag to indicate task is used for internal tats of the scheduler (optional, default False, boolean) -CONFIG_INTERVAL: Cron expression to schedule time/date based execution of task (optional, default "", string) -CONFIG_TASK_TIMEOUT: Timeout in minutes for task to complete (optional, default is action's value or global timeout, number) -CONFIG_TASK_NAME: Name of the task, exception is raised if not specified or name does already exist (mandatory, string) -CONFIG_PARAMETERS: dictionary with names and values passed to the executed action of this task(optional,default {}, dictionary) -CONFIG_THIS_ACCOUNT: Set to True to run tasks for resources in the account of the (optional, default True, boolean) -CONFIG_TIMEZONE: Timezone for time/date based tasks for this task (optional, default UTC, string) -CONFIG_TAG_FILTER: Tag filter used to select resources for the task instead of name of task in the list of values for the automation tag. Only allowed if selected resources support tags (optional, default "", string) -CONFIG_REGIONS: Regions in which to run the task. Use "*" for all regions in which the service for this tasks action is available. If no regions are specified the region in which the scheduler is installed is used as default. Specifying one or more regions for services tha are not region specific will generate a warning when processing the task. (optional, default current region, List<string>) -CONFIG_STACK_ID: Id of the stack if the task is created as part of a cloudformation template (optional, default None, string) -CONFIG_DRYRUN: Dryrun parameter passed to the executed action (optional, default False, boolean) -CONFIG_EVENTS: List of resource events that trigger the task to be executed (optional, default, List<string>) -CONFIG_DRYRUN: Dryrun parameter passed to the executed action (optional, default False, boolean) -CONFIG_EVENTS: List of resource events that trigger the task to be executed (optional, default, List<string>) :return: Item created in the task configuration """ with _get_logger(context=context) as logger: logger.info("create_task") config = TaskConfiguration(context=context, logger=logger) name = kwargs.get(configuration.CONFIG_TASK_NAME) if name is None or len(name) == 0: raise ValueError(ERR_NO_TASK_NAME) item = config.get_config_item(name) if item is not None: raise ValueError(ERR_TASK_DOES_ALREADY_EXIST.format(name)) new_item = config.put_config_item(**kwargs) return safe_json(new_item)
def execute(self, _): def snapshots_to_delete(): def by_retention_days(): delete_before_dt = datetime.utcnow().replace( tzinfo=pytz.timezone("UTC")) - timedelta( days=int(self.retention_days)) self.logger.info(INFO_RETENTION_DAYS, delete_before_dt) for sn in sorted(self.snapshots, key=lambda s: s["Region"]): snapshot_dt = dateutil.parser.parse(sn["StartTime"]) if snapshot_dt < delete_before_dt: self.logger.info(INFO_SN_RETENTION_DAYS, sn["SnapshotId"], sn["StartTime"], self.retention_days) yield sn def by_retention_count(): self.logger.info(INFO_KEEP_RETENTION_COUNT, self.retention_count) sorted_snapshots = sorted( self.snapshots, key=lambda s: (s["VolumeId"], dateutil.parser.parse(s["StartTime"])), reverse=True) volume = None count_for_volume = 0 for sn in sorted_snapshots: if sn["VolumeId"] != volume: volume = sn["VolumeId"] count_for_volume = 0 count_for_volume += 1 if count_for_volume > self.retention_count: self.logger.info(INFO_SN_DELETE_RETENTION_COUNT, sn["SnapshotId"], count_for_volume) yield sn return by_retention_days( ) if self.retention_days else by_retention_count() self.logger.info("{}, version {}", self.properties[ACTION_TITLE], self.properties[ACTION_VERSION]) region = None ec2 = None deleted_count = 0 self.logger.info(INFO_ACCOUNT_SNAPSHOTS, len(self.snapshots), self.account) self.logger.debug("Snapshots : {}", self.snapshots) snapshot_id = "" for snapshot in snapshots_to_delete(): if snapshot["Region"] != region: region = snapshot["Region"] self.logger.info(INFO_REGION, region) ec2 = get_client_with_retries("ec2", ["delete_snapshot"], region=region, context=self.context, session=self.session) if "deleted" not in self.result: self.result["deleted"] = {} self.result["deleted"][region] = [] try: snapshot_id = snapshot["SnapshotId"] ec2.delete_snapshot_with_retries(DryRun=self.dryrun, SnapshotId=snapshot_id) deleted_count += 1 self.logger.info(INFO_SNAPSHOT_DELETED, snapshot_id, snapshot["VolumeId"]) self.result["deleted"][region].append(snapshot_id) except ClientError as ex_client: if ex_client.response.get("Error", {}).get( "Code", "") == "InvalidSnapshot.NotFound": self.logger.info( "Snapshot \"{}\" was not found and could not be deleted", snapshot_id) else: raise ex_client except Exception as ex: if self.dryrun: self.logger.debug(str(ex)) self.result["delete_snapshot"] = str(ex) return self.result else: raise ex self.result.update({ "snapshots": len(self.snapshots), "snapshots-deleted": deleted_count, METRICS_DATA: build_action_metrics(self, DeletedSnapshots=deleted_count) }) return safe_json(self.result)
def is_completed(self, _, start_results): """ Tests if the create snapshot actions have been completed. This method uses the id of the created snapshots and test if the status of all snapshot are "available". As long as this is not the case the method must return None :param start_results: Result of the execute method that started the creation of the snapshots :param _: not used :return: Result of test if all snapshots are available, None if at least one snapshot is in pending state """ # start result data is passed in as text, for this action it is json formatted snapshot_create_data = json.loads(start_results) self.logger.debug("Start result data is {}", start_results) snapshot_ids = [ volume.get("create_snapshot", {}).get("SnapshotId") for volume in snapshot_create_data.get("volumes", {}).values() ] self.logger.info("Checking status of snapshot(s) {}", ",".join(snapshot_ids)) # create service instance to test is snapshots are available ec2 = services.create_service( "ec2", session=self.session, service_retry_strategy=get_default_retry_strategy( "ec2", context=self.context)) # test if the snapshot with the ids that were returned from the CreateSnapshot API call exists and are completed snapshots = ec2.describe("Snapshots", OwnerIds=["self"], Filters=[{ "Name": "snapshot-id", "Values": snapshot_ids }]) test_result = { "InstanceId": snapshot_create_data["instance"], "Volumes": [{ "VolumeId": s["VolumeId"], "SnapshotId": s["SnapshotId"], "State": s["State"], "Progress": s["Progress"] } for s in snapshots] } self.logger.info(INFO_STATE_SNAPSHOTS, json.dumps(test_result)) # wait until all snapshot are no longer pending for volume in test_result["Volumes"]: if volume["State"] == SNAPSHOT_STATE_PENDING: self.logger.info(INFO_CREATION_PENDING) return None # collect possible failed snapshots failed = [] for volume in test_result["Volumes"]: if volume["State"] == SNAPHOT_STATE_ERROR: failed.append(volume) if len(failed) > 0: s = ",".join([ ERR_FAILED_SNAPSHOT.format(volume["SnapshotId"], volume["VolumeId"]) for volume in failed ]) raise Exception(s) self.logger.info(INFO_COMPLETED) return safe_json(test_result)
def handle_request(self): """ Handles the select resources request. Creates new actions for resources found for a task :return: Results of handling the request """ def is_selected_resource(resource, taskname, tags_filter, does_resource_supports_tags): """ Tests if item is a selected resource for this task :param resource: The tested resource :param taskname: Name of the task :param tags_filter: Tag filter :param does_resource_supports_tags: Trie if the resource supports tags :return: True if resource is selected """ # No tags then always selected if not does_resource_supports_tags: self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource) return True tags = resource.get("Tags", {}) # name of the tag that holds the list of tasks for this resource tagname = self._task_tag if tags_filter is None: # test if name of the task is in list of tasks in tag value if tagname in tags and taskname in [ x.strip() for x in tags[tagname].split(',') ]: self._logger.debug( DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE, safe_json(resource, indent=2), tagname, taskname) return True else: # using a tag filter, * means any tag if tags_filter == "*": self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER, safe_json(resource, indent=2), taskname) return True # test if there are any tags matching the tag filter matched_tags = TagFilterSet( tags_filter).pairs_matching_any_filter(tags) if len(matched_tags) != 0: self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER, safe_json(resource, indent=2), matched_tags, tag_filter, taskname) return True self._logger.debug(DEBUG_RESOURCE_NOT_SELECTED, safe_json(resource, indent=2), taskname) return False def resource_batches(resources): """ Returns resources as chunks of size items. If the class has an optional custom aggregation function then the reousrces are aggregated first using this function before applying the batch size :param resources: resources to process :return: Generator for blocks of resource items """ aggregate_func = getattr(self.action_class, actions.CUSTOM_AGGREGATE_METHOD, None) batch_size = self.action_properties.get(actions.ACTION_BATCH_SIZE) for i in aggregate_func( resources, self.task_parameters) if aggregate_func is not None else [ resources ]: if batch_size is None: yield i else: first = 0 while first < len(i): yield i[first:first + batch_size] first += batch_size try: items = [] start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=2)) self._logger.debug(DEBUG_ACTION, safe_json(self.action_properties, indent=2)) self._logger.info(INFO_SELECTED_RESOURCES, self.resource_name, self.service, self.task[handlers.TASK_NAME]) self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level) task_level_aggregated_resources = [] args = self._build_describe_argument() supports_tags = self.action_properties.get( actions.ACTION_RESOURCES) in services.create_service( self.service).resources_with_tags args["tags"] = supports_tags self._logger.info(INFO_USE_TAGS_TO_SELECT, "R" if supports_tags else "No r") task_name = self.task[handlers.TASK_NAME] # get optional tag filter tag_filter = self.task.get(handlers.TASK_TAG_FILTER) if tag_filter is None: self._logger.debug(DEBUG_SELECT_BY_TASK_NAME, self.resource_name, self._task_tag, task_name) elif tag_filter == "*": self._logger.debug(DEBUG_SELECT_ALL_RESOURCES, self.resource_name) else: self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES, self.resource_name) with TaskTrackingTable(self._context) as actions_tracking: for service in self._account_service_sessions(self.service): assumed_role = service.assumed_role self._logger.info(INFO_ACCOUNT, service.aws_account) if assumed_role is not None: self._logger.info(INFO_ASSUMED_ROLE, assumed_role) for region in self._regions: if region is not None: args["region"] = region else: if "region" in args: del args["region"] self._logger.debug(DEBUG_SELECT_PARAMETERS, self.resource_name, self.service, args) # selecting a list of all resources in this account/region all_resources = list( service.describe(self.resource_name, **args)) logstr = INFO_RESOURCES_FOUND.format( len(all_resources)) if region is not None: logstr = INFO_IN_REGION.format(logstr, region) self._logger.info(logstr) # select resources that are processed by the task selected = list([ sr for sr in all_resources if is_selected_resource( sr, task_name, tag_filter, supports_tags) ]) if len(all_resources) > 0: self._logger.info(INFO_RESOURCES_SELECTED, len(selected)) if len(selected) == 0: continue if not self.keep_tags: for res in selected: if "Tags" in res: del res["Tags"] if self.aggregation_level == actions.ACTION_AGGREGATION_TASK: task_level_aggregated_resources += selected elif self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT: if self._check_can_execute(selected): # create tasks action for account aggregated resources , optionally split in batch size chunks for r in resource_batches(selected): action_item = actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=r, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info( INFO_ACCOUNT_AGGREGATED, action_item[tracking.TASK_TR_ID], len(r), self.resource_name, self.task[handlers.TASK_NAME]) else: for res in selected: # task action for each selected resource action_item = actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=res, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info( INFO_RESOURCE, action_item[tracking.TASK_TR_ID], self.resource_name, self.task[handlers.TASK_NAME]) if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len( task_level_aggregated_resources) > 0: if self._check_can_execute( task_level_aggregated_resources): for r in resource_batches( task_level_aggregated_resources): # create tasks action for task aggregated resources , optionally split in batch size chunks action_item = actions_tracking.add_task_action( task=self.task, assumed_role=None, action_resources=r, task_datetime=self.task_dt, source=self.source) items.append(action_item) self._logger.info(INFO_TASK_AGGREGATED, action_item[tracking.TASK_TR_ID], len(r), self.resource_name, self.task[handlers.TASK_NAME]) self._logger.info(INFO_ADDED_ITEMS, len(items), self.task[handlers.TASK_NAME]) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INFO_RESULT, running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "dispatched-tasks": items }) finally: self._logger.flush()
def tasks_items_to_execute(): """ Generator function that selects all record items from the event that need processing. :return: """ def is_new_action(task_record): if task_record["eventName"] == "INSERT": return task_record["dynamodb"]["NewImage"].get( tracking.TASK_TR_STATUS).get("S") return False def is_completed_with_concurrency(task_record): if task_record["eventName"] in ["UPDATE", "MODIFY"]: new_task_item = task_record["dynamodb"]["NewImage"] concurrency_key = new_task_item.get( tracking.TASK_TR_CONCURRENCY_KEY, {}).get("S") status = new_task_item.get(tracking.TASK_TR_STATUS, {}).get("S") return concurrency_key is not None and status in [ tracking.STATUS_COMPLETED, tracking.STATUS_FAILED, tracking.STATUS_TIMED_OUT ] def is_wait_for_completion(task_record): if task_record["eventName"] in ["UPDATE", "MODIFY"]: old_task_item = task_record["dynamodb"]["OldImage"] old_status = old_task_item.get(tracking.TASK_TR_STATUS, {}).get("S") old_wait_ts = old_task_item.get( tracking.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S") new_task_item = task_record["dynamodb"]["NewImage"] new_status = new_task_item.get(tracking.TASK_TR_STATUS, {}).get("S") new_wait_ts = new_task_item.get( tracking.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S") return old_status == tracking.STATUS_WAIT_FOR_COMPLETION and new_status == tracking.STATUS_WAIT_FOR_COMPLETION \ and old_wait_ts is not None and old_wait_ts != new_wait_ts for record in self._event.get("Records"): self._logger.debug("Record to process is {}", safe_json(record, indent=2)) if record.get("eventSource") == "aws:dynamodb": if record["eventName"] == "REMOVE": continue update_to_handle = None self._logger.debug_enabled = record["dynamodb"][ "NewImage"].get(tracking.TASK_TR_DEBUG, {}).get("BOOL", False) if is_new_action(record): update_to_handle = NEW_TASK elif is_completed_with_concurrency(record): update_to_handle = FINISHED_CONCURRENY_TASK elif is_wait_for_completion(record): update_to_handle = CHECK_COMPLETION if update_to_handle is not None: yield update_to_handle, record else: self._logger.debug("No action for this record")
def handle_request(self): """ Handled the cloudwatch rule timer event :return: Started tasks, if any, information """ def is_matching_event_state(event_state, ec2event): return event_state in [s.strip() for s in ec2event.split(",") ] or ec2event != "*" try: result = [] start = datetime.now() self._logger.info("Handler {}", self.__class__.__name__) state = self._event.get("detail", {}).get("state") if state is not None: state = state.lower() account = self._event["account"] region = self._event["region"] instance_id = self._event["detail"]["instance-id"] dt = self._event["time"] task = None try: # for all ec2 events tasks in configuration for task in [ t for t in TaskConfiguration(context=self._context, logger=self._logger).get_tasks() if t.get("events") is not None and EC2_STATE_EVENT in t["events"] and t.get("enabled", True) ]: task_name = task["name"] ec2_event = task["events"][EC2_STATE_EVENT] if not is_matching_event_state(state, ec2_event): continue result.append(task_name) self._logger.info(INFO_EVENT, task_name, state, instance_id, account, region, safe_json(task, indent=2)) # create an event for lambda function that scans for resources for this task event = { handlers.HANDLER_EVENT_ACTION: handlers.HANDLER_ACTION_SELECT_RESOURCES, handlers.HANDLER_SELECT_ARGUMENTS: { handlers.HANDLER_EVENT_REGIONS: [region], handlers.HANDLER_EVENT_ACCOUNT: account, "InstanceIds": [instance_id] }, handlers.HANDLER_EVENT_SOURCE: EC2_STATE_EVENT, handlers.HANDLER_EVENT_TASK: task, handlers.HANDLER_EVENT_TASK_DT: dt } if self._context is not None: # start lambda function to scan for task resources payload = str.encode(safe_json(event)) client = get_client_with_retries("lambda", ["invoke"], context=self._context) client.invoke_with_retries( FunctionName=self._context.function_name, Qualifier=self._context.function_version, InvocationType="Event", LogType="None", Payload=payload) else: # or if not running in lambda environment pass event to main task handler lambda_handler(event, None) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": (datetime.now() - start).total_seconds(), "event-datetime": dt, "started-tasks": result }) except ValueError as ex: self._logger.error("{}\n{}".format(ex, safe_json(task, indent=2))) finally: self._logger.flush()
def get_action(name, context=None, log_this_call=True): """ Gets the details of the specified action :param name: Name of the action, raises an exception if the action does not exist :param context: Lambda context :param log_this_call: switch :return: Details of the specified action. This dictionary can contain the following actions: Constants used below can be found in actions/__init__.py -ACTION_SERVICE: Name of the service of the resources of this action -ACTION_RESOURCES: Name of the resources for this action -ACTION_AGGREGATION: Possible values are: ACTION_AGGREGATION_RESOURCE: resources are not aggregated, execution of the action for each individual resource. ACTION_AGGREGATION_ACCOUNT: resources are aggregated per account, execution of the action for the list of resources in that account ACTION_AGGREGATION_TASK: resources are aggregated per task, single execution of the action for list of all resources in all accounts -ACTION_SELECT_EXPRESSION: Optional JMES path to map/select attributes of and filtering of resources -ACTION_BATCH_SIZE: Optional batch size for aggregated resources. -ACTION_PERMISSIONS: Optional, permissions required for the action -ACTION_MEMORY: Optional memory requirement for lambda function to run action, default is size of the scheduler lambda function -ACTION_CROSS_ACCOUNT: Optional, cross account operations supported by action, default is True -ACTION_EVENT_FILTER: Optional, regex filter which type of source events are supported by the action, default is None (all events) -ACTION_TITLE: Optional, title to be used in UI -ACTION_DESCRIPTION: Optional, description or url to be used in UI -ACTION_AUTHOR: Optional, author of the action -ACTION_VERSION: Optional, implementation version of the action -ACTION_MULTI_REGION: Optional, True if the action can execute in multiple regions (default) -ACTION_INTERNAL: Optional, True if the service can only be used in internal tasks -ACTION_PARAM_STACK_RESOURCES: Optional, cloudformation snippet of resources owen and used by action implementation -ACTION_STACK_RESOURCES_PERMISSIONS: Optional, list of permissions for action stack resources -ACTION_PARAMETERS: Parameters for the action: -PARAM_ALLOWED_VALUES: allowed valued for a parameter (optional) -PARAM_DEFAULT: default value for a parameter (optional) -PARAM_MAX_LEN: max length for a string parameter (optional) -PARAM_MAX_VALUE: max value for a numeric parameter (optional) -PARAM_MIN_LEN: min length for a string parameter (optional) -PARAM_MIN_VALUE: # min value for a numeric parameter (optional) -PARAM_PATTERN: allowed pattern for a string parameter (optional) -PARAM_REQUIRED: true if parameter is required (default=False) -PARAM_TYPE: (Python) type name of a parameter -PARAM_DESCRIBE_PARAMETER: name of a parameter if it must be used as a parameter in the describe method for a resource -PARAM_DESCRIPTION: user readable description for parameter -PARAM_LABEL: label for parameter """ with _get_logger(context=context) as logger: if log_this_call: logger.info("get_action") all_actions = actions.all_actions() if name not in all_actions: raise ValueError(ERR_ACTION_DOES_NOT_EXIST.format(name, ",".join(all_actions))) return safe_json(actions.get_action_properties(name))
def execute(self, _): """ Executes logic of copy snapshot action :param _: :return: Result of starting the snapshot copy and setting the tags on the copy """ def get_tags_for_copied_snapshot(): snap_shot_tags = ( self.copied_volume_tagfiter.pairs_matching_any_filter( self.snapshot.get("Tags", {}))) snap_shot_tags.update(self.snapshot_tags) return { tag_key: snap_shot_tags[tag_key] for tag_key in snap_shot_tags if not (tag_key.startswith("aws:") or tag_key.startswith("cloudformation:")) } # logged information self.logger.info("{}, version {}", self.properties[ACTION_TITLE], self.properties[ACTION_VERSION]) self.logger.info(INFO_ACCOUNT_SNAPSHOT, self.source_snapshot_id, self.account, self.source_region, self.destination_region) self.logger.debug("Snapshot : {}", self.snapshot) # ec2 client for destination to create copy and tag ec2_destination = get_client_with_retries( "ec2", ["copy_snapshot", "create_tags"], region=self.destination_region, context=self.context, session=self.session) # ec2 client for source to set tag on source to mark it as copied ec2_source = get_client_with_retries("ec2", ["create_tags"], region=self.source_region, context=self.context, session=self.session) boto_call = "copy_snapshot" try: # setup argument for CopySnapshot call args = { "SourceRegion": self.source_region, "SourceSnapshotId": self.source_snapshot_id } if self.dryrun: args["DryRun"] = True if self.description != "": args["Description"] = self.description # start the copy resp = ec2_destination.copy_snapshot_with_retries(**args) # id of the copy copied_snapshot_id = resp.get("SnapshotId") self.logger.info(INFO_SNAPSHOT_COPIED, self.source_snapshot_id, self.destination_region, copied_snapshot_id) self.result[boto_call] = resp self.result["copied-snapshot-id"] = copied_snapshot_id # set tags on the copy boto_call = "create_tags (target)" tags = get_tags_for_copied_snapshot() self.logger.info(INFO_CREATE_TAGS, tags) snapshot_tags = [{"Key": t, "Value": tags[t]} for t in tags] self.result[boto_call] = ec2_destination.create_tags_with_retries( Tags=snapshot_tags, Resources=[copied_snapshot_id]) self.logger.info(INFO_TAGS_CREATED) # set a tag on the source snapshot to mark it as copied boto_call = "create_tags (source)" source_tags = [{ "Key": self.marked_as_copied_tag, "Value": safe_json({ "destination-region": self.destination_region, "copied-snapshot-id": copied_snapshot_id, "copied": datetime.now().isoformat() }) }] self.result[boto_call] = ec2_source.create_tags_with_retries( Tags=source_tags, Resources=[self.source_snapshot_id]) except Exception as ex: if self.dryrun: self.logger.debug(str(ex)) self.result[boto_call] = str(ex) return self.result else: raise ex self.result[METRICS_DATA] = build_action_metrics(self, CopiedSnapshots=1) return safe_json(self.result)