def handle_request(self):
        """
        Handles the select resources request. Creates new actions for resources found for a task
        :return: Results of handling the request
        """
        def filter_by_action_filter(srv, used_role, r):
            filter_method = getattr(self.action_class,
                                    actions.SELECT_AND_PROCESS_RESOURCE_METHOD,
                                    None)
            if filter_method is not None:
                r = filter_method(srv, self._logger, self._resource_name, r,
                                  self._context, self.task, used_role)
                if r is None:
                    self._logger.debug(
                        DEBUG_FILTER_METHOD, self.action_class.__name__,
                        actions.SELECT_AND_PROCESS_RESOURCE_METHOD)
                    return None
                else:
                    self._logger.debug(
                        DEBUG_FILTERED_RESOURCE, self.action_class.__name__,
                        actions.SELECT_AND_PROCESS_RESOURCE_METHOD,
                        safe_json(r, indent=3))

            return r

        def is_selected_resource(aws_service, resource, used_role, taskname,
                                 tags_filter, does_resource_supports_tags):

            # No tags then just use filter method if any
            if not does_resource_supports_tags:
                self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource)
                return filter_by_action_filter(srv=aws_service,
                                               used_role=used_role,
                                               r=resource)

            tags = resource.get("Tags", {})

            # name of the tag that holds the list of tasks for this resource
            tagname = self._task_tag

            if tags_filter is None:
                # test if name of the task is in list of tasks in tag value
                if (tagname not in tags) or (taskname
                                             not in tagging.split_task_list(
                                                 tags[tagname])):
                    self._logger.debug(
                        DEBUG_RESOURCE_NOT_SELECTED,
                        safe_json(resource, indent=2), taskname, ','.join(
                            ["'{}'='{}'".format(t, tags[t]) for t in tags]))
                    return None
                self._logger.debug(DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE,
                                   safe_json(resource, indent=2), tagname,
                                   taskname)
            else:
                # using a tag filter, * means any tag
                if tags_filter != tagging.tag_filter_set.WILDCARD_CHAR:
                    # test if there are any tags matching the tag filter
                    if not TagFilterExpression(tags_filter).is_match(tags):
                        self._logger.debug(
                            DEBUG_RESOURCE_NOT_SELECTED_TAG_FILTER,
                            safe_json(resource, indent=2), taskname, ','.join([
                                "'{}'='{}'".format(t, tags[t]) for t in tags
                            ]))
                        return None
                    self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER,
                                       safe_json(resource, indent=2), tags,
                                       tag_filter_str, taskname)
                else:
                    self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER,
                                       safe_json(resource, indent=2), taskname)
                    return filter_by_action_filter(srv=aws_service,
                                                   used_role=used_role,
                                                   r=resource)

            return filter_by_action_filter(srv=aws_service,
                                           used_role=used_role,
                                           r=resource)

        def resource_batches(resources):
            """
            Returns resources as chunks of size items. If the class has an optional custom aggregation function then the 
            resources are aggregated first using this function before applying the batch size
            :param resources: resources to process
            :return: Generator for blocks of resource items
            """

            aggregate_func = getattr(self.action_class,
                                     actions.CUSTOM_AGGREGATE_METHOD, None)

            for i in aggregate_func(
                    resources, self.task_parameters,
                    self._logger) if aggregate_func is not None else [
                        resources
                    ]:
                if self.batch_size is None:
                    yield i
                else:
                    first = 0
                    while first < len(i):
                        yield i[first:first + self.batch_size]
                        first += self.batch_size

        def setup_tag_filtering(t_name):
            # get optional tag filter
            no_select_by_tags = self.action_properties.get(
                actions.ACTION_NO_TAG_SELECT, False)
            if no_select_by_tags:
                tag_filter_string = tagging.tag_filter_set.WILDCARD_CHAR
            else:
                tag_filter_string = self.task.get(handlers.TASK_TAG_FILTER)

            # set if only a single task is required for selecting the resources, it is used to optimise the select
            select_tag = None
            if tag_filter_string is None:
                self._logger.debug(DEBUG_SELECT_BY_TASK_NAME,
                                   self._resource_name, self._task_tag, t_name)
                select_tag = self._task_tag
            elif tag_filter_string == tagging.tag_filter_set.WILDCARD_CHAR:
                self._logger.debug(DEBUG_SELECT_ALL_RESOURCES,
                                   self._resource_name)
            else:
                self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES,
                                   self._resource_name)
                # build the tag expression that us used to filter the resources
                tag_filter_expression = TagFilterExpression(tag_filter_string)
                # the keys of the used tags
                tag_filter_expression_tag_keys = list(
                    tag_filter_expression.get_filter_keys())
                # if there is only a single tag then we can optimize by just filtering on that specific tag
                if len(tag_filter_expression_tag_keys) == 1 and \
                        tagging.tag_filter_set.WILDCARD_CHAR not in tag_filter_expression_tag_keys[0]:
                    select_tag = tag_filter_expression_tag_keys[0]
            return select_tag, tag_filter_string

        def add_aggregated(aggregated_resources):
            # create tasks action for aggregated resources , optionally split in batch size chunks
            for ra in resource_batches(aggregated_resources):
                if self._check_can_execute(ra):
                    action_item = self.actions_tracking.add_task_action(
                        task=self.task,
                        assumed_role=assumed_role,
                        action_resources=ra,
                        task_datetime=self.task_dt,
                        source=self.source,
                        task_group=self.task_group)

                    self._logger.debug(DEBUG_ADDED_AGGREGATED_RESOURCES_TASK,
                                       action_item[handlers.TASK_TR_ID],
                                       len(ra), self._resource_name,
                                       self.task[handlers.TASK_NAME])

                    self._logger.debug("Added item\n{}",
                                       safe_json(action_item, indent=3))

                    yield action_item

        def add_as_individual(resources):
            for ri in resources:
                # task action for each selected resource
                if self._check_can_execute([ri]):
                    action_item = self.actions_tracking.add_task_action(
                        task=self.task,
                        assumed_role=assumed_role,
                        action_resources=ri,
                        task_datetime=self.task_dt,
                        source=self.source,
                        task_group=self.task_group)

                    self._logger.debug(DEBUG_ADD_SINGLE_RESOURCE_TASK,
                                       action_item[handlers.TASK_TR_ID],
                                       self._resource_name,
                                       self.task[handlers.TASK_NAME])
                    self._logger.debug("Added item\n{}",
                                       safe_json(action_item, indent=3))
                    yield action_item

        try:
            task_items = []
            start = datetime.now()

            self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=3))
            self._logger.debug(DEBUG_ACTION,
                               safe_json(self.action_properties, indent=3))

            self._logger.info(INFO_SELECTED_RESOURCES, self._resource_name,
                              self.service, self.task[handlers.TASK_NAME])
            self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level)

            task_level_aggregated_resources = []

            args = self._build_describe_argument()

            service_resource_with_tags = services.create_service(
                self.service).resources_with_tags
            if self._resource_name == "":
                supports_tags = len(service_resource_with_tags) != 0
            else:
                supports_tags = self._resource_name.lower() in [
                    r.lower() for r in service_resource_with_tags
                ]

            args["tags"] = supports_tags
            self._logger.info(INFO_USE_TAGS_TO_SELECT,
                              "R" if supports_tags else "No r")

            task_name = self.task[handlers.TASK_NAME]
            count_resource_items = 0
            selected_resource_items = 0

            select_on_tag, tag_filter_str = setup_tag_filtering(task_name)

            filter_func = getattr(self.action_class,
                                  actions.FILTER_RESOURCE_METHOD, None)

            # timer to guard selection time and log warning if getting close to lambda timeout
            if self._context is not None:
                self.start_timer(REMAINING_TIME_AFTER_DESCRIBE)
            try:

                for assumed_role in self._task_assumed_roles():
                    retry_strategy = get_default_retry_strategy(
                        service=self.service, context=self._context)

                    service = services.create_service(
                        service_name=self.service,
                        service_retry_strategy=retry_strategy,
                        role_arn=assumed_role)

                    if self.is_timed_out():
                        break

                    # contains resources for account
                    account_level_aggregated_resources = []

                    self._logger.info(INFO_ACCOUNT, service.aws_account)
                    if assumed_role not in [None, ""]:
                        self._logger.info(INFO_ASSUMED_ROLE, assumed_role)

                    for region in self._regions:

                        # test for timeouts
                        if self.is_timed_out():
                            break

                        # handle region passed in the event
                        if region is not None:
                            args["region"] = region
                        else:
                            if "region" in args:
                                del args["region"]

                        # resources can be passed in the invent by event handlers
                        all_resources = self._event.get(
                            handlers.HANDLER_SELECT_RESOURCES, None)

                        if all_resources is None:

                            # actions can have an optional method to select resources
                            action_custom_describe_function = getattr(
                                self.action_class, "describe_resources", None)
                            if action_custom_describe_function is not None and self.use_custom_select:
                                all_resources = action_custom_describe_function(
                                    service, self.task, region)
                            else:
                                # select resources from the service
                                self._logger.debug(DEBUG_SELECT_PARAMETERS,
                                                   self._resource_name,
                                                   self.service, args)
                                # selecting a list of all resources in this account/region
                                all_resources = list(
                                    service.describe(
                                        self._resource_name,
                                        filter_func=filter_func,
                                        select_on_tag=select_on_tag,
                                        **args))
                            # test for timeout
                            if self.is_timed_out():
                                break

                            count_resource_items += len(all_resources)

                        self._logger.info(INFO_RESOURCES_FOUND,
                                          len(all_resources))

                        # select resources that are processed by the task
                        selected_resources = []
                        for sr in all_resources:
                            sel = is_selected_resource(
                                aws_service=service,
                                resource=sr,
                                used_role=assumed_role,
                                taskname=task_name,
                                tags_filter=tag_filter_str,
                                does_resource_supports_tags=supports_tags)
                            if sel is not None:
                                selected_resources.append(sel)

                        selected_resource_items += len(selected_resources)

                        # display found and selected resources
                        if len(all_resources) > 0:
                            self._logger.info(INFO_RESOURCES_SELECTED,
                                              len(selected_resources))
                            if len(selected_resources) == 0:
                                continue

                        # delete tags if not needed by the action
                        if not self.keep_tags:
                            for res in selected_resources:
                                if "Tags" in res:
                                    del res["Tags"]

                        # add resources to total list of resources for this task
                        if self.aggregation_level == actions.ACTION_AGGREGATION_TASK:
                            task_level_aggregated_resources += selected_resources

                        # add resources to list of resources for this account
                        if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT:
                            account_level_aggregated_resources += selected_resources

                        # add batch(es) of resources for this region
                        if self.aggregation_level == actions.ACTION_AGGREGATION_REGION and len(
                                selected_resources) > 0:
                            task_items += list(
                                add_aggregated(selected_resources))

                        # no aggregation, add each individual resource
                        if self.aggregation_level == actions.ACTION_AGGREGATION_RESOURCE and len(
                                selected_resources) > 0:
                            task_items += list(
                                add_as_individual(selected_resources))

                    # at the end of the region loop, check if aggregated resources for account need to be added
                    if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT and len(
                            account_level_aggregated_resources) > 0:
                        task_items += list(
                            add_aggregated(account_level_aggregated_resources))

                # at the end of the accounts loop, check if aggregated resources for task need to be added
                if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len(
                        task_level_aggregated_resources) > 0:
                    task_items += list(
                        add_aggregated(task_level_aggregated_resources))
            except Exception as ex:
                raise_exception(ERR_SELECTING_TASK_RESOURCES,
                                self.task[handlers.TASK_NAME], ex)

            finally:
                if self._timer is not None:
                    # cancel time used avoid timeouts when selecting resources
                    self._timer.cancel()
                    if self.is_timed_out():
                        raise_exception(ERR_TIMEOUT_SELECTING_RESOURCES,
                                        self._resource_name, self.service,
                                        task_name)

                    self.start_timer(REMAINING_TIME_AFTER_STORE)

                    self.actions_tracking.flush(self._timeout_event)
                    if self.is_timed_out():
                        raise_exception(
                            ERR_CREATING_TASKS_FOR_SELECTED_RESOURCES,
                            task_name)
                    self._timer.cancel()
                else:
                    self.actions_tracking.flush()

            self._logger.info(INFO_ADDED_ITEMS, len(task_items),
                              self.task[handlers.TASK_NAME])

            running_time = float((datetime.now() - start).total_seconds())
            self._logger.info(INFO_RESULT, running_time)

            if self.metrics:
                put_task_select_data(task_name=task_name,
                                     items=count_resource_items,
                                     selected_items=selected_resource_items,
                                     logger=self._logger,
                                     selection_time=running_time)

            return safe_dict({
                "datetime": datetime.now().isoformat(),
                "running-time": running_time,
                "dispatched-tasks": task_items
            })

        finally:
            self._logger.flush()
示例#2
0
    def _start_task_execution(self,
                              task_item,
                              action=handlers.HANDLER_ACTION_EXECUTE):
        """
        Creates an instance of the lambda function that executes the tasks action. It first checks is the action has specific memory
        requirements and based on this it creates a copy of this instance or one configured for the required memory. All
        information for executing the action is passed in the event.
        :param task_item: Task item for which action is executed
        :return:
        """

        try:

            self._logger.debug(
                "Entering start_task_execution ({}) with task {}", action,
                safe_json(task_item, indent=3))

            # Create event for execution of the action and set its action so that is picked up by the execution handler
            event = {i: task_item.get(i) for i in task_item}
            event[handlers.HANDLER_EVENT_ACTION] = action

            self._logger.debug(DEBUG_ACTION,
                               task_item[handlers.TASK_TR_ACTION],
                               task_item[handlers.TASK_TR_NAME],
                               task_item[handlers.TASK_TR_ID])

            self._logger.debug(
                DEBUG_ACTION_PARAMETERS,
                safe_json(task_item.get(handlers.TASK_TR_PARAMETERS, {}),
                          indent=3))

            # get memory allocation for executing the task
            lambda_size = handlers.TASK_TR_COMPLETION_SIZE \
                if action == handlers.HANDLER_ACTION_TEST_COMPLETION \
                else handlers.TASK_TR_EXECUTE_SIZE

            execute_lambda_size = task_item.get(lambda_size,
                                                actions.ACTION_SIZE_STANDARD)

            if execute_lambda_size == actions.ACTION_USE_ECS:
                ecs_memory = task_item.get(
                    handlers.TASK_EXECUTE_ECS_MEMORY
                    if action == handlers.HANDLER_ACTION_EXECUTE else
                    handlers.TASK_COMPLETION_ECS_MEMORY, None)
            else:
                ecs_memory = None

            if not handlers.running_local(self._context):

                self._logger.debug(DEBUG_MEMORY_SIZE, execute_lambda_size)

                if execute_lambda_size != actions.ACTION_USE_ECS:

                    # create event payload
                    payload = str.encode(safe_json(event))

                    # determine which lambda to execute on
                    function_name = "{}-{}-{}".format(
                        os.getenv(handlers.ENV_STACK_NAME),
                        os.getenv(handlers.ENV_LAMBDA_NAME),
                        execute_lambda_size)

                    self._logger.debug(
                        "Running execution of task on lambda function {}",
                        function_name)

                    self._logger.debug(DEBUG_LAMBDA_FUNCTION_, function_name,
                                       payload)
                    # start lambda function
                    lambda_client = boto_retry.get_client_with_retries(
                        "lambda", ["invoke"],
                        context=self._context,
                        logger=self._logger)
                    resp = lambda_client.invoke_with_retries(
                        FunctionName=function_name,
                        InvocationType="Event",
                        LogType="None",
                        Payload=payload)

                    task_info = {
                        "id": task_item[handlers.TASK_TR_ID],
                        "task": task_item[handlers.TASK_TR_NAME],
                        "action": task_item[handlers.TASK_TR_ACTION],
                        "payload": payload,
                        "status-code": resp["StatusCode"]
                    }

                    self._logger.debug(DEBUG_LAMBDA,
                                       safe_json(task_info, indent=2))
                    self.invoked_lambda_functions.append(task_info)
                else:
                    # run as ECS job
                    ecs_args = {
                        handlers.HANDLER_EVENT_ACTION: action,
                        handlers.TASK_NAME: task_item[handlers.TASK_TR_NAME],
                        handlers.TASK_TR_ID: task_item[handlers.TASK_TR_ID]
                    }

                    self._logger.debug(DEBUG_RUNNING_ECS_TASK, action,
                                       task_item[handlers.TASK_TR_NAME])
                    handlers.run_as_ecs_job(ecs_args,
                                            ecs_memory_size=ecs_memory,
                                            context=self._context,
                                            logger=self._logger)

            else:
                lambda_handler(event, self._context)

            ResultNotifications(context=self._context,
                                logger=self._logger).publish_started(task_item)

        except Exception as ex:
            self._logger.error(ERR_RUNNING_TASK, task_item, str(ex),
                               full_stack())
    def is_completed(self, snapshot_create_data):

        def grant_create_volume_permissions(snap_ids):

            if self.accounts_with_create_permissions is not None and len(self.accounts_with_create_permissions) > 0:

                args = {

                    "CreateVolumePermission": {
                        "Add": [{"UserId": a.strip()} for a in self.accounts_with_create_permissions]
                    }
                }
                for snapshot_id in snap_ids:
                    args["SnapshotId"] = snapshot_id

                    try:
                        self.ec2_client.modify_snapshot_attribute_with_retries(**args)
                        self._logger_.info(INFO_SETTING_CREATE_VOLUME_PERMISSIONS, ", ".join(self.accounts_with_create_permissions))
                        self.result["create-volume-access-accounts"] = [a.strip() for a in self.accounts_with_create_permissions]
                    except Exception as ex:
                        raise_exception(ERR_SETTING_CREATE_VOLUME_PERMISSIONS, self.accounts_with_create_permissions, ex)

        def tag_shared_snapshots(snapshot_data, snap_ids):
            if self.accounts_with_create_permissions not in ["", None] and self.tag_shared_snapshots:

                for account in self.accounts_with_create_permissions:

                    session_for_tagging = self.get_action_session(account=account,
                                                                  param_name=PARAM_SHARED_ACCOUNT_TAGGING_ROLENAME,
                                                                  logger=self._logger_)

                    if session_for_tagging is None:
                        self._logger_.error(ERR_TAGS_NOT_SET_IN_ACCOUNT, account)
                        continue

                    try:
                        ec2_client = get_client_with_retries(service_name="ec2",
                                                             methods=[
                                                                 "create_tags",
                                                                 "delete_tags"
                                                             ],
                                                             context=self._context_,
                                                             region=self._region_,
                                                             session=session_for_tagging,
                                                             logger=self._logger_)
                        for snap_id in snap_ids:
                            tags = snapshot_data.get(snap_id, {}).get("tags", None)
                            if tags is not None:
                                self._logger_.info(INFO_SET_SNAPSHOT_TAGS_SHARED, safe_json(tags, indent=3), snap_id, account,
                                                   self._region_)
                                tagging.set_ec2_tags(ec2_client=ec2_client,
                                                     resource_ids=[snap_id],
                                                     tags=tags,
                                                     logger=self._logger_)
                    except Exception as ex:
                        raise Exception(ERR_SETTING_SHARED_TAGS.format(account, str(ex)))

        def set_volume_tags(volume_id, snap_id):
            tags = self.build_tags_from_template(parameter_name=PARAM_VOLUME_TAGS,
                                                 tag_variables={
                                                     TAG_PLACEHOLDER_VOLUME_SNAPSHOT: snap_id
                                                 })

            if len(tags) > 0:

                try:
                    tagging.set_ec2_tags(ec2_client=self.ec2_client,
                                         resource_ids=[volume_id],
                                         tags=tags,
                                         logger=self._logger_)

                    self._logger_.info(INFO_SET_VOLUME_TAGS, safe_json(tags, indent=3), volume_id)
                except Exception as ex:
                    raise Exception(ERR_SETTING_VOLUME_TAGS.format(self.instance_id, ex))

        def set_instance_tags(snap_ids):
            tags = self.build_tags_from_template(parameter_name=PARAM_INSTANCE_TAGS,
                                                 tag_variables={
                                                     TAG_PLACEHOLDER_INSTANCE_SNAPSHOTS: ','.join(sorted(snap_ids))
                                                 })
            if len(tags) > 0:
                try:
                    self.set_ec2_instance_tags_with_event_loop_check(instance_ids=[self.instance_id],
                                                                     tags_to_set=tags,
                                                                     client=self.ec2_client,
                                                                     region=self._region_)

                    self._logger_.info(INFO_SET_INSTANCE_TAGS, safe_json(tags, indent=3), self.instance_id)
                except Exception as ex:
                    raise Exception(ERR_SETTING_INSTANCE_TAGS.format(self.instance_id, ex))

        snapshot_ids = [volume.get("create_snapshot", {}).get("SnapshotId") for volume in
                        list(snapshot_create_data.get("volumes", {}).values())]

        self._logger_.info(INFO_CHECKING_SNAPSHOT_STATUS, ",".join(snapshot_ids))

        if len(snapshot_ids) == 0:
            return {
                "InstanceId": snapshot_create_data["instance"],
                "Volumes": []
            }

        # create service instance to test is snapshots are available
        ec2 = services.create_service("ec2", session=self._session_,
                                      service_retry_strategy=get_default_retry_strategy("ec2", context=self._context_))

        # test if the snapshot with the ids that were returned from the CreateSnapshot API call exists and are completed
        snapshots = list(ec2.describe(services.ec2_service.SNAPSHOTS,
                                      OwnerIds=["self"],
                                      region=self.instance["Region"],
                                      Filters=[
                                          {
                                              "Name": "snapshot-id", "Values": snapshot_ids
                                          }
                                      ]))

        if len(snapshots) != len(snapshot_ids):
            # allow 5 minutes to all snapshots to appear
            start_time = dateutil.parser.parse(snapshot_create_data["start-time"])
            if self._datetime_.now() - start_time < timedelta(minutes=5):
                self._logger_.info(INFO_NOT_ALL_IN_PROGRESS)
                return None

        test_result = {
            "InstanceId": snapshot_create_data["instance"],
            "Volumes": [{
                "VolumeId": s["VolumeId"],
                "SnapshotId": s["SnapshotId"],
                "State": s["State"],
                "Progress": s["Progress"]
            } for s in snapshots]
        }

        self._logger_.info(INFO_STATE_SNAPSHOTS, safe_json(test_result, indent=3))

        # wait until all snapshot are no longer pending
        for volume in test_result["Volumes"]:
            if volume["State"] == SNAPSHOT_STATE_PENDING:
                self._logger_.info(INFO_CREATION_PENDING)
                return None

        # collect possible failed snapshots
        failed = []
        for volume in test_result["Volumes"]:
            if volume["State"] == SNAPSHOT_STATE_ERROR:
                failed.append(volume)

        if len(failed) > 0:
            s = ",".join([ERR_FAILED_SNAPSHOT.format(volume["SnapshotId"], volume["VolumeId"]) for volume in failed])
            raise Exception(s)

        if len(snapshot_ids) != len(snapshots):
            created_snapshots = [s["SnapshotId"] for s in snapshots]
            raise Exception(ERR_MISSING_SNAPSHOTS.format(",".join([s for s in snapshot_ids if s not in created_snapshots])))

        snapshot_ids = [s["SnapshotId"] for s in snapshots]
        # set tags on source instance
        set_instance_tags(snapshot_ids)

        for s in snapshots:
            set_volume_tags(volume_id=s["VolumeId"], snap_id=s["SnapshotId"])

        # set permissions to create volumes from snapshots
        grant_create_volume_permissions(snapshot_ids)
        # tag resources in accounts the snapshots are shared with
        tag_shared_snapshots(snapshot_create_data.get("snapshots", {}), snapshot_ids)
        self._logger_.info(INFO_COMPLETED)
        return test_result
def run_as_ecs_job(args, ecs_memory_size, context=None, logger=None):
    """
    Runs a teak step as ecs task
    :param args: ecs task parameters
    :param ecs_memory_size: reserved memory size for ecs task container
    :param context: lambda context
    :param logger: logger
    :return: result of ecs task submission
    """
    start_time = time.time()
    start_task_timeout = 300

    def timed_out_no_context(next_wait):
        return (time.time() - start_time) > (start_task_timeout - next_wait)

    def timed_out_by_lambda_timeout(next_wait):
        if context is None:
            return False

        context_seconds_left = context.get_remaining_time_in_millis() * 1000
        return context_seconds_left < (5 + next_wait)

    runner_args = copy.deepcopy(args)

    ecs_client = boto_retry.get_client_with_retries("ecs", ["run_task"], context=context)
    stack_name = os.getenv(ENV_STACK_NAME)

    runner_args["stack"] = stack_name
    runner_args["stack_region"] = ecs_client.meta.region_name

    ecs_params = {
        "cluster": os.getenv(ENV_ECS_CLUSTER),
        "taskDefinition": os.getenv(ENV_ECS_TASK),
        "startedBy": "{}:{}".format(stack_name, args[TASK_NAME])[0:35],
        "overrides": {
            "containerOverrides": [
                {
                    "name": "ops-automator",
                    "command": ["python", "ops-automator-ecs-runner.py", safe_json(runner_args)],
                    "memoryReservation": int(ecs_memory_size if ecs_memory_size is not None else ECS_DEFAULT_MEMORY_RESERVATION)
                }
            ],
        },
    }

    for wait_until_next_retry in boto_retry.LinearWaitStrategy(start=5, incr=5, max_wait=30, random_factor=0.50):

        # try to start task
        resp = ecs_client.run_task_with_retries(**ecs_params)

        # test if task was started
        if len(resp["tasks"]) != 0:
            if logger is not None:
                logger.info("{} executed as ECS job:{}\n", args[HANDLER_EVENT_ACTION],
                            safe_json(resp.get("tasks", []), indent=3))
            return resp

        # investigate failures, note that no exceptions are raised if tasks fails because of insufficient resources in cluster
        failures = resp.get("failures", [])

        # test for other failures than not enough memory resources on cluster instances
        # and test if there is time left for another retry based on on Lambda timeout or fixed timeout when not running in Lambda
        if not all([f["reason"] == "RESOURCE:MEMORY" for f in resp["failures"]]) or \
                (timed_out_by_lambda_timeout(next_wait=wait_until_next_retry) or
                 timed_out_no_context(next_wait=wait_until_next_retry)):
            raise_exception(ERR_FAILED_TO_START_ECS_TASK, safe_json(args), safe_json(failures, indent=3))
        else:
            # sleep until nxt retry
            time.sleep(wait_until_next_retry)
    def _handle_test_task_completion(self):
        def completion_timed_out():
            """
            Function is called when the handling of the request times out
            :return:
            """
            time_used = int(
                os.getenv(handlers.ENV_LAMBDA_TIMEOUT) -
                self._context.get_remaining_time_in_millis() / 1000)
            self._logger.warning(WARN_COMPLETION_CHECK_TIMEOUT, time_used)

            if self.action_properties.get(actions.ACTION_COMPLETION_SIZE,
                                          None) is not None:
                self._logger.warning(
                    WARN_ADJUST_LAMBDA_MEMORY_SETTINGS_COMPLETION, time_used,
                    self.task)

            self._timeout_event.set()

            self._logger.flush()
            if self._timer is not None:
                self._timer.cancel()

        execution_time = int(time.time()) - self.started_at
        execution_time_str = str(timedelta(seconds=execution_time))

        result_data = {
            "task": self.task,
            "action": self.action,
            "id": self.action_id,
            "datetime": datetime.now().isoformat(),
            "running-time": execution_time
        }

        if self._context is not None:
            execution_time_left = (
                self._context.get_remaining_time_in_millis() /
                1000.00) - REMAINING_COMPLETION_CHECK
            self._timer = threading.Timer(execution_time_left,
                                          completion_timed_out)
            self._timer.start()

        try:
            # make one more check for completion before testing for timeout
            check_result = self._action_instance.is_completed(
                self.start_result)
        finally:
            if self._timer is not None:
                self._timer.cancel()
                if self._timeout_event.is_set():
                    raise Exception("Task completion check timed out")

        if check_result is not None:

            self._action_tracking.update_task(
                action_id=self.action_id,
                task=self.task,
                task_metrics=self.metrics,
                status=handlers.STATUS_COMPLETED,
                status_data={
                    handlers.TASK_TR_RESULT: check_result,
                    handlers.TASK_TR_EXECUTION_TIME: str(execution_time)
                })

            self._logger.info(INF_TASK_COMPLETED,
                              safe_json(check_result, indent=3),
                              execution_time_str)

            result_data.update({
                "result": handlers.STATUS_COMPLETED,
                "action-result": str(check_result)
            })

        elif execution_time > self.timeout:
            self._action_tracking.update_task(
                action_id=self.action_id,
                task=self.task,
                task_metrics=self.metrics,
                status=handlers.STATUS_TIMED_OUT,
                status_data={
                    handlers.TASK_TR_EXECUTION_TIME: str(execution_time)
                })

            self._logger.error(ERR_TASK_TIMEOUT, execution_time_str)

            result_data.update({"result": handlers.STATUS_TIMED_OUT})

            return result_data

        else:

            self._logger.info(INF_ACTION_NOT_COMPLETED, execution_time_str)
            result_data.update({"result": handlers.STATUS_WAIT_FOR_COMPLETION})

        return safe_dict(result_data)
示例#6
0
    def add_task_action(self,
                        task,
                        assumed_role,
                        action_resources,
                        task_datetime,
                        source,
                        task_group=None):

        item = {
            handlers.TASK_TR_ID: str(uuid.uuid4()),
            handlers.TASK_TR_NAME: task[handlers.TASK_NAME],
            handlers.TASK_TR_ACTION: task[handlers.TASK_ACTION],
            handlers.TASK_TR_CREATED: datetime.now().isoformat(),
            handlers.TASK_TR_CREATED_TS: int(time()),
            handlers.TASK_TR_SOURCE: source,
            handlers.TASK_TR_DT: task_datetime,
            handlers.TASK_TR_STATUS: handlers.STATUS_PENDING,
            handlers.TASK_TR_DEBUG: task[handlers.TASK_DEBUG],
            handlers.TASK_TR_NOTIFICATIONS: task[handlers.TASK_NOTIFICATIONS],
            handlers.TASK_TR_METRICS: task[handlers.TASK_METRICS],
            handlers.TASK_TR_DRYRUN: task[handlers.TASK_DRYRUN],
            handlers.TASK_TR_INTERNAL: task[handlers.TASK_INTERNAL],
            handlers.TASK_TR_INTERVAL: task[handlers.TASK_INTERVAL],
            handlers.TASK_TR_TIMEZONE: task[handlers.TASK_TIMEZONE],
            handlers.TASK_TR_TIMEOUT: task[handlers.TASK_TIMEOUT],
            handlers.TASK_TR_STARTED_TS: int(time()),
            handlers.TASK_TR_EXECUTE_SIZE: task[handlers.TASK_EXECUTE_SIZE],
            handlers.TASK_TR_SELECT_SIZE: task[handlers.TASK_SELECT_SIZE],
            handlers.TASK_TR_EVENTS: task.get(handlers.TASK_EVENTS, {}),
            handlers.TASK_TR_COMPLETION_SIZE:
            task[handlers.TASK_COMPLETION_SIZE],
            handlers.TASK_TR_TAGFILTER: task[handlers.TASK_TAG_FILTER],
            handlers.TASK_TR_GROUP: task_group,
            handlers.TASK_TR_SERVICE: task[handlers.TASK_SERVICE],
            handlers.TASK_TR_RESOURCE_TYPE: task[handlers.TASK_RESOURCE_TYPE]
        }

        item[handlers.TASK_TR_RUN_LOCAL] = self._run_local

        if assumed_role is not None:
            item[handlers.TASK_TR_ASSUMED_ROLE] = assumed_role
            item[handlers.TASK_TR_ACCOUNT] = services.account_from_role_arn(
                assumed_role)
        else:
            item[handlers.TASK_TR_ACCOUNT] = self.account

        if len(task[handlers.TASK_PARAMETERS]) > 0:
            item[handlers.TASK_TR_PARAMETERS] = task[handlers.TASK_PARAMETERS]

        parameters = item.get(handlers.TASK_TR_PARAMETERS, None)
        if parameters is not None:
            item[handlers.TASK_TR_PARAMETERS] = parameters

        # check if the class has a field or static method that returns true if the action class needs completion
        # this way we can make completion dependent of parameter values
        has_completion = getattr(
            actions.get_action_class(task[handlers.TASK_ACTION]),
            actions.ACTION_PARAM_HAS_COMPLETION, None)
        if has_completion is not None:
            # if it is static method call it passing the task parameters
            if isinstance(has_completion, types.FunctionType):
                has_completion = has_completion(parameters)
        else:
            # if it does not have this method test if the class has an us_complete method
            has_completion = getattr(
                actions.get_action_class(task[handlers.TASK_ACTION]),
                handlers.COMPLETION_METHOD, None) is not None

        item[handlers.TASK_TR_HAS_COMPLETION] = has_completion

        resource_data_str = safe_json(action_resources)

        if len(resource_data_str) < int(
                os.getenv(handlers.ENV_RESOURCE_TO_S3_SIZE, 16)) * 1024:
            item[handlers.TASK_TR_RESOURCES] = as_dynamo_safe_types(
                action_resources)
        else:
            bucket = os.getenv(handlers.ENV_RESOURCE_BUCKET)
            key = "{}.json".format(item[handlers.TASK_TR_ID])

            try:
                self.s3_client.put_object_with_retries(Body=resource_data_str,
                                                       Bucket=bucket,
                                                       Key=key)
            except Exception as ex:
                raise_exception(ERR_WRITING_RESOURCES, bucket, key,
                                item[handlers.TASK_TR_ID], ex)
            item[handlers.TASK_TR_S3_RESOURCES] = True

        self._new_action_items.append(item)

        return item
        def tasks_items_to_execute():
            """
            Generator function that selects all record items from the event that need processing.
            :return:
            """
            def table_name(rec):
                source_arn = rec["eventSourceARN"]
                return source_arn.split("/")[1]

            def from_tracking_table(rec):
                return table_name(rec) == os.getenv(
                    handlers.ENV_ACTION_TRACKING_TABLE)

            def from_concurrency_table(rec):
                return table_name(rec) == os.getenv(
                    handlers.ENV_CONCURRENCY_TABLE)

            def get_old_image(task_record):
                return task_record["dynamodb"].get("OldImage", {})

            def get_new_image(task_record):
                return task_record["dynamodb"].get("NewImage", {})

            def get_new_status(task_record):
                return get_new_image(task_record).get(handlers.TASK_TR_STATUS,
                                                      {}).get("S")

            def get_old_status(task_record):
                return get_new_image(task_record).get(handlers.TASK_TR_STATUS,
                                                      {}).get("S")

            def is_task_tracking_table_update(task_record):
                if not from_tracking_table(task_record):
                    return False
                return task_record["eventName"] in ["UPDATE", "MODIFY"]

            def is_task_done(task_record):

                if not is_task_tracking_table_update(task_record):
                    return False

                new_status = get_new_status(task_record)
                old_status = get_old_status(task_record)

                if old_status != new_status:
                    return False
                return new_status in handlers.task_tracking_table.NOT_LONGER_ACTIVE_STATUSES

            def is_task_with_concurrency(task_record):
                return get_new_image(task_record).get(
                    handlers.TASK_TR_CONCURRENCY_KEY, {}).get("S") is not None

            def get_old_last_update(task_record):
                return get_old_image(task_record).get(
                    handlers.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S")

            def get_new_last_update(task_record):
                return get_new_image(task_record).get(
                    handlers.TASK_TR_LAST_WAIT_COMPLETION, {}).get("S")

            def is_delete_task(task_record):
                return from_tracking_table(
                    r) and task_record["eventName"] == "REMOVE"

            def is_new_task(task_record):
                if from_tracking_table(
                        r) and task_record["eventName"] == "INSERT":
                    return get_new_status(
                        task_record) == handlers.STATUS_PENDING
                return False

            def is_completed_with_concurrency(task_record):
                return is_task_done(task_record) and is_task_with_concurrency(
                    task_record)

            def is_completed_without_concurrency(task_record):

                return is_task_done(
                    task_record) and not is_task_with_concurrency(task_record)

            def is_wait_for_completion(task_record):

                if not is_task_tracking_table_update(task_record):
                    return False

                if get_old_status(task_record) != handlers.STATUS_WAIT_FOR_COMPLETION or \
                        get_new_status(task_record) != handlers.STATUS_WAIT_FOR_COMPLETION:
                    return False

                return get_old_last_update(task_record) != get_new_last_update(
                    task_record)

            def is_concurrency_task_completed(concurrency_record):
                if not from_concurrency_table(concurrency_record):
                    return False

                if concurrency_record["eventName"] == "REMOVE":
                    return False

                return concurrency_record["dynamodb"].get("NewImage", {}).get(
                    "RunNext", {}).get("BOOL", False)

            def get_action_type(rec):

                if is_new_task(rec):
                    return NEW_TASK

                if is_completed_without_concurrency(rec):
                    return FINISHED_TASK

                if is_completed_with_concurrency(rec):
                    return FINISHED_CONCURRENCY_TASK

                if is_wait_for_completion(rec):
                    return CHECK_COMPLETION

                if is_delete_task(rec):
                    return DELETE_ITEM

                if is_concurrency_task_completed(rec):
                    return START_WAITING_ACTION

                return None

            for r in self._event.get("Records"):

                self._logger.debug("Record to process is {}",
                                   safe_json(r, indent=2))

                if r.get("eventSource") == "aws:dynamodb":

                    image_used = "NewImage" if "NewImage" in r[
                        "dynamodb"] else "OldImage"

                    if r["dynamodb"].get("NewImage", {}).get(handlers.TASK_TR_ACTION) is None and \
                            r["dynamodb"].get("OldImage", {}).get(handlers.TASK_TR_ACTION) is not None:
                        continue

                    self._logger.debug_enabled = r["dynamodb"][image_used].get(
                        handlers.TASK_TR_DEBUG, {}).get("BOOL", False)

                    update_to_handle = get_action_type(r)

                    if update_to_handle is not None:
                        yield update_to_handle, r
                    else:
                        self._logger.debug("No action for record")
    def _handle_task_execution(self):
        def execute_timed_out():
            """
            Function is called when the handling of the request times out
            :return:
            """
            time_used = int(
                int(os.getenv(handlers.ENV_LAMBDA_TIMEOUT)) -
                self._context.get_remaining_time_in_millis() / 1000)
            self._logger.error(ERR_EXECUTION_NOT_COMPLETED, time_used)

            if self.action_properties.get(actions.ACTION_EXECUTE_SIZE,
                                          None) is not None:
                self._logger.error(ERR_TIMEOUT, self.task)

            self._timeout_event.set()
            self._logger.flush()
            self._timer.cancel()

        def handle_metrics(result):
            self._logger.info(
                INF_SENDING_METRICS_DATA,
                "enabled" if allow_send_metrics() else "disabled")
            if allow_send_metrics():
                try:
                    result_data = result if isinstance(
                        result, dict) else json.loads(result)
                    if actions.METRICS_DATA in result_data:
                        send_metrics_data(
                            metrics_data=result_data[actions.METRICS_DATA],
                            logger=self._logger)
                except Exception as ex:
                    self._logger.warning(WARN_METRICS_DATA, str(ex))

        self._logger.info(INF_ACTION, self.action, self.action_id, self.task,
                          safe_json(self.action_parameters, indent=3))
        if not handlers.running_local(self._context):
            self._logger.info(INF_LAMBDA_MEMORY, self._context.function_name,
                              self._context.memory_limit_in_mb)

        self._logger.debug("Setting task state to {}", handlers.STATUS_STARTED)
        self._action_tracking.update_task(self.action_id,
                                          self.task,
                                          task_metrics=self.metrics,
                                          status=handlers.STATUS_STARTED)

        start = time.time()

        return_data = {
            "task": self.task,
            "action": self.action,
            "id": self.action_id,
            "dryrun": self.dryrun,
        }

        if self._context is not None:
            execution_time_left = (
                self._context.get_remaining_time_in_millis() /
                1000.00) - EXECUTE_TIME_REMAINING
            self._timer = threading.Timer(execution_time_left,
                                          execute_timed_out)
            self._timer.start()

        try:
            self._logger.debug("Start executing task")
            action_result = self._action_instance.execute()
            if isinstance(action_result, str):
                action_result = json.loads(action_result)
        finally:
            if self._timer is not None:
                self._timer.cancel()
                if self._timeout_event.is_set():
                    raise Exception("Timeout execution action")

        if not self._action_instance.properties.get(actions.ACTION_INTERNAL,
                                                    False):
            handle_metrics(action_result)

        execution_time = int(time.time() - start)
        self._logger.debug("Task needs{}completion",
                           " no" if not self.has_completion else " ")
        if not self.has_completion or self.dryrun:

            self._logger.debug("Setting state of task to {} ",
                               handlers.STATUS_COMPLETED)
            self._action_tracking.update_task(
                action_id=self.action_id,
                task=self.task,
                task_metrics=self.metrics,
                status=handlers.STATUS_COMPLETED,
                status_data={
                    handlers.TASK_TR_STARTED_TS: int(start),
                    handlers.TASK_TR_RESULT: action_result,
                    handlers.TASK_TR_EXECUTION_TIME: str(execution_time),
                    handlers.TASK_TR_EXECUTION_LOGSTREAM:
                    self.execution_log_stream
                })
            # noinspection PyBroadException
            try:
                self._logger.info(INF_ACTION_RESULT, execution_time,
                                  safe_json(action_result, indent=3))
            except Exception:
                self._logger.info(INF_ACTION_RESULT, execution_time,
                                  str(action_result))

        else:
            # the action has a method for testing completion of the task, set the status to waiting and store the result
            # of the execution that started the action as start result that will be passed to the completion method together
            self._logger.debug("Setting state of task to {} ",
                               handlers.STATUS_WAIT_FOR_COMPLETION)
            self._action_tracking.update_task(
                action_id=self.action_id,
                task=self.task,
                task_metrics=self.metrics,
                status=handlers.STATUS_WAIT_FOR_COMPLETION,
                status_data={
                    handlers.TASK_TR_LAST_WAIT_COMPLETION:
                    datetime.now().isoformat(),
                    handlers.TASK_TR_STARTED_TS:
                    int(start),
                    handlers.TASK_TR_START_RESULT:
                    action_result,
                    handlers.TASK_TR_START_EXECUTION_TIME:
                    str(execution_time),
                    handlers.TASK_TR_EXECUTION_LOGSTREAM:
                    self.execution_log_stream
                })

            self._logger.info(INF_STARTED_AND_WAITING_FOR_COMPLETION,
                              safe_json(action_result, indent=3))

            if not handlers.running_local(self._context):
                rule = handlers.enable_completion_cloudwatch_rule(
                    self._context)
                self._logger.info(INF_RULE_ENABLED, rule)
            else:
                self._logger.info(INF_SIMULATION_MODE_NO_RULE_ENABLED)

        # no exception from action
        return_data.update({
            "result":
            handlers.STATUS_WAIT_FOR_COMPLETION
            if self.has_completion else handlers.STATUS_COMPLETED,
            "action-result":
            str(action_result),
            "datetime":
            datetime.now().isoformat(),
            "running-time":
            str(execution_time),
            "task-group":
            self._event[handlers.TASK_TR_GROUP],
            "task-id":
            self._event[handlers.TASK_TR_ID]
        })

        return safe_dict(return_data)
示例#9
0
        def validate_new_instance():

            replaced_state = self.ec2.get_instance(
                instance_id=self.replaced_instance_id)["State"]["Code"] & 0xFF

            if self.new_instance_id is None:

                if expected_new_type != self.replaced_instance[
                        "InstanceType"] or replace_if_same_type:
                    self.fail("Instance was expected to be replaced")

                if replaced_state in [
                        replace_instance.EC2_STATE_TERMINATED,
                        replace_instance.EC2_STATE_SHUTTING_DOWN,
                        replace_instance.EC2_STATE_STOPPING
                ]:
                    self.fail(
                        "Instance was {} not replaced but is not longer running"
                        .format(self.replaced_instance_id))

                self.logger.test("[X] Instance was not replaced as expected")
                return True

            if replaced_state == replace_instance.EC2_STATE_RUNNING:
                self.fail("Replaced instance {} is still running".format(
                    self.replaced_instance_id))

            replaced_instance_checked_data = jmespath.search(
                JMESPATH_CHECKED_INSTANCE_DATA, self.replaced_instance)
            new_instance_checked_data = jmespath.search(
                JMESPATH_CHECKED_INSTANCE_DATA, self.new_instance)
            if replaced_instance_checked_data != new_instance_checked_data:
                self.fail("Instance data is {}, expected {}".format(
                    safe_json(new_instance_checked_data, indent=3),
                    safe_json(replaced_instance_checked_data, indent=3)))

            # instance id must be different if the type was not the same or replaced if same type
            if self.replaced_instance_id == self.new_instance_id:
                if not same_type or replace_if_same_type:
                    self.fail(
                        "InstanceId {} op new instance should not be the same as Id of replaces instance"
                        .format(self.replaced_instance_id))

            new_instance_state = self.new_instance["State"]["Code"] & 0xFF
            if stopped_instance:
                if not replace_instance.Ec2ReplaceInstanceAction.is_in_stopping_or_stopped_state(
                        new_instance_state):
                    self.fail(
                        "New Instance {} is not in stopping or stopped state, {}"
                        .format(self.new_instance_id, new_instance_state))

            else:
                if replace_instance.EC2_STATE_RUNNING != new_instance_state:
                    self.fail("New instance {} is not in running, {}".format(
                        self.new_instance_id, new_instance_state))

            check_load_balancer_registrations()

            instance_placeholders = {
                replace_instance.TAG_PLACEHOLDER_ORG_INSTANCE_ID:
                self.replaced_instance_id,
                replace_instance.TAG_PLACEHOLDER_NEW_INSTANCE_TYPE:
                self.new_instance["InstanceType"],
                replace_instance.TAG_PLACEHOLDER_ORG_INSTANCE_TYPE:
                self.start_type
            }

            new_instance_tags = self.new_instance.get("Tags", {})
            if not testing.tags.verify_placeholder_tags(
                    new_instance_tags, instance_placeholders, exclude_tags=["tag-" + tagging.TAG_VAL_TASK]) or \
                    new_instance_tags.get("Name") != test_method or \
                    new_instance_tags.get("scaling", None) is not None:
                self.fail("Expected tags not set on new instance")

            time.sleep(15)
            check_instance_volumes_tags()

            checked_type = expected_new_type if expected_new_type is not None else \
                self.get_new_instance_types(self.replaced_instance_id, same_type)[0]
            if self.new_instance["InstanceType"] != checked_type:
                self.fail(
                    "Type of instance {} is {} but expected type {}".format(
                        self.new_instance_id,
                        self.new_instance["InstanceType"], checked_type))
    def handle_request(self, use_custom_select=True):
        """
        Handled the cloudwatch rule timer event
        :return: Started tasks, if any, information
        """
        try:

            self._logger.info("Handling CloudWatch event {}",
                              safe_json(self._event, indent=3))

            result = []
            start = datetime.now()

            dt = self._event_time()
            config_task = None

            source_resource_tags = None

            try:

                # for all events tasks in configuration
                for config_task in TaskConfiguration(
                        context=self._context,
                        logger=self._logger).get_tasks():

                    self._logger.debug_enabled = config_task.get(
                        handlers.TASK_DEBUG, False)

                    if not self._event_triggers_task(task=config_task):
                        continue

                    # tasks that can react to events with a wider resource scope than the actual resource causing the event may
                    # have a filter that can is used to filter based on the tags of the resource
                    event_source_tag_filter = config_task.get(
                        handlers.TASK_EVENT_SOURCE_TAG_FILTER, None)
                    if event_source_tag_filter is not None:
                        if source_resource_tags is None:
                            # get the tags for the source resource of the event
                            session = services.get_session(
                                self._role_executing_triggered_task,
                                logger=self._logger)
                            if session is None:
                                self._logger.error(
                                    ERR_NO_SESSION_FOR_GETTING_TAGS)
                                continue
                            try:
                                source_resource_tags = self._source_resource_tags(
                                    session, config_task)
                            except Exception as ex:
                                self._logger.error(
                                    ERR_GETTING_EVENT_SOURCE_RESOURCE_TAGS, ex)
                                continue

                            self._logger.debug(
                                "Tags for event source resource are  {}",
                                source_resource_tags)

                        # apply filter to source resource tags
                        if not TagFilterExpression(
                                event_source_tag_filter).is_match(
                                    source_resource_tags):
                            self._logger.debug(
                                "Tags of source resource do not match tag filter {}",
                                event_source_tag_filter)
                            continue

                    task_name = config_task[handlers.TASK_NAME]
                    result.append(task_name)

                    select_parameters = self._select_parameters(
                        self._event_name(), config_task)
                    if select_parameters is None:
                        continue

                    self._logger.debug(DEBUG_EVENT, task_name,
                                       self._event_name(), select_parameters,
                                       self._event_account(),
                                       self._event_region(),
                                       safe_json(config_task, indent=3))

                    # create an event for lambda function that scans for resources for this task
                    lambda_event = {
                        handlers.HANDLER_EVENT_ACTION:
                        handlers.HANDLER_ACTION_SELECT_RESOURCES,
                        handlers.HANDLER_EVENT_CUSTOM_SELECT:
                        use_custom_select,
                        handlers.HANDLER_SELECT_ARGUMENTS: {
                            handlers.HANDLER_EVENT_REGIONS:
                            [self._event_region()],
                            handlers.HANDLER_EVENT_ACCOUNT:
                            self._event_account(),
                            handlers.HANDLER_EVENT_RESOURCE_NAME:
                            config_task[handlers.TASK_RESOURCE_TYPE],
                        },
                        handlers.HANDLER_EVENT_SOURCE:
                        "{}:{}:{}".format(self._handled_event_source,
                                          self._handled_detail_type,
                                          self._event_name()),
                        handlers.HANDLER_EVENT_TASK:
                        config_task,
                        handlers.HANDLER_EVENT_TASK_DT:
                        dt
                    }

                    for i in select_parameters:
                        lambda_event[handlers.HANDLER_SELECT_ARGUMENTS][
                            i] = select_parameters[i]

                    if self._event_resources() is not None:
                        self._logger.debug(
                            DEBUG_EVENT_RESOURCES,
                            safe_json(self._event_resources(), indent=3))
                        lambda_event[
                            handlers.
                            HANDLER_SELECT_RESOURCES] = self._event_resources(
                            )

                    if not handlers.running_local(self._context):
                        # start lambda function to scan for task resources
                        payload = str.encode(safe_json(lambda_event))
                        client = get_client_with_retries("lambda", ["invoke"],
                                                         context=self._context,
                                                         logger=self._logger)
                        client.invoke_with_retries(
                            FunctionName=self._context.function_name,
                            InvocationType="Event",
                            LogType="None",
                            Payload=payload)
                    else:
                        # or if not running in lambda environment pass event to main task handler
                        lambda_handler(lambda_event, None)

                return safe_dict({
                    "datetime":
                    datetime.now().isoformat(),
                    "running-time": (datetime.now() - start).total_seconds(),
                    "event-datetime":
                    dt,
                    "started-tasks":
                    result
                })

            except ValueError as ex:
                self._logger.error(ERR_HANDLING_EVENT_IN_BASE_HANDLER, ex,
                                   safe_json(config_task, indent=2))

        finally:
            self._logger.flush()
    def _execute_task(self, task, dt=None, task_group=None):
        """
        Execute a task by starting a lambda function that selects the resources for that action
        :param task: Task started
        :param dt: Task start datetime
        :return:
        """

        debug_state = self._logger.debug_enabled
        self._logger.debug_enabled = task.get(handlers.TASK_DEBUG, False)
        if task_group is None:
            task_group = str(uuid.uuid4())
        try:

            event = {
                handlers.HANDLER_EVENT_ACTION:
                handlers.HANDLER_ACTION_SELECT_RESOURCES,
                handlers.HANDLER_EVENT_TASK:
                task,
                handlers.HANDLER_EVENT_SOURCE:
                "scheduler-handler",
                handlers.HANDLER_EVENT_TASK_DT:
                dt.isoformat()
                if dt is not None else datetime.utcnow().isoformat(),
                handlers.HANDLER_EVENT_TASK_GROUP:
                task_group
            }

            sub_tasks = list(
                ScheduleHandler.task_account_region_sub_tasks(task))
            for sub_task in sub_tasks:

                event[handlers.HANDLER_EVENT_SUB_TASK] = sub_task

                if not handlers.running_local(self._context):

                    if task[handlers.
                            TASK_SELECT_SIZE] != actions.ACTION_USE_ECS:
                        # start lambda function to scan for task resources
                        payload = str.encode(safe_json(event))
                        client = get_client_with_retries("lambda", ["invoke"],
                                                         context=self._context)

                        function_name = "{}-{}-{}".format(
                            os.getenv(handlers.ENV_STACK_NAME),
                            os.getenv(handlers.ENV_LAMBDA_NAME),
                            task[handlers.TASK_SELECT_SIZE])

                        self._logger.info(INFO_RUNNING_LAMBDA, function_name)

                        try:
                            resp = client.invoke_with_retries(
                                FunctionName=function_name,
                                InvocationType="Event",
                                LogType="None",
                                Payload=payload)

                            self._logger.debug(DEBUG_LAMBDA,
                                               resp["StatusCode"], payload)
                        except Exception as ex:
                            self._logger.error(ERR_FAILED_START_LAMBDA_TASK,
                                               str(ex))

                    else:
                        ecs_args = {
                            handlers.HANDLER_EVENT_ACTION:
                            handlers.HANDLER_ACTION_SELECT_RESOURCES,
                            handlers.TASK_NAME: task[handlers.TASK_NAME],
                            handlers.HANDLER_EVENT_SUB_TASK: sub_task
                        }

                        ecs_memory = task.get(handlers.TASK_SELECT_ECS_MEMORY,
                                              None)

                        self._logger.info(INFO_RUNNING_AS_ECS_JOB,
                                          task[handlers.TASK_NAME])
                        handlers.run_as_ecs_job(ecs_args,
                                                ecs_memory_size=ecs_memory,
                                                context=self._context,
                                                logger=self._logger)

                else:
                    if task[handlers.
                            TASK_SELECT_SIZE] == actions.ACTION_USE_ECS:
                        ecs_args = {
                            handlers.HANDLER_EVENT_ACTION:
                            handlers.HANDLER_ACTION_SELECT_RESOURCES,
                            handlers.TASK_NAME: task[handlers.TASK_NAME],
                            handlers.HANDLER_EVENT_SUB_TASK: sub_task
                        }

                        ecs_memory = task.get(handlers.TASK_SELECT_ECS_MEMORY,
                                              None)

                        handlers.run_as_ecs_job(ecs_args,
                                                ecs_memory_size=ecs_memory,
                                                logger=self._logger)
                    else:
                        # or if not running in lambda environment pass event to main task handler
                        lambda_handler(event, self._context)

            return task_group, sub_tasks

        finally:
            self._logger.debug_enabled = debug_state
    def handle_scheduler_tasks(self, task_config):

        started_tasks = {}
        start = datetime.now()

        last_run_dt = self._get_last_run()

        self._logger.info(INFO_LAST_SAVED, last_run_dt.isoformat())

        if self.configuration_update:
            self._logger.info(INFO_CONFIG_RUN, self.updated_task)

        current_dt = self._set_last_run()
        already_ran_this_minute = last_run_dt == current_dt
        if already_ran_this_minute and not (self.configuration_update
                                            or self.execute_task_request):
            self._logger.info(INFO_TASK_SCHEDULER_ALREADY_RAN)

        else:

            self._logger.info(INFO_CURRENT_SCHEDULING_DT, current_dt)

            task = None
            enabled_tasks = 0

            next_executed_task = None
            utc = pytz.timezone("UTC")

            tasks = [
                t for t in task_config.get_tasks()
                if t.get(handlers.TASK_INTERVAL) is not None
                and t.get(handlers.TASK_ENABLED, True)
            ]

            try:
                for task in tasks:

                    enabled_tasks += 1

                    self._logger.debug_enabled = task[handlers.TASK_DEBUG]

                    task_name = task[handlers.TASK_NAME]

                    # timezone for specific task
                    task_timezone = pytz.timezone(task[handlers.TASK_TIMEZONE])

                    # create cron expression to test if task needs te be executed
                    task_cron_expression = CronExpression(
                        expression=task[handlers.TASK_INTERVAL])

                    localized_last_run = last_run_dt.astimezone(task_timezone)
                    localized_current_dt = current_dt.astimezone(task_timezone)

                    next_execution = task_cron_expression.first_within_next(
                        timedelta(hours=24), localized_current_dt)
                    next_execution_utc = next_execution.astimezone(
                        utc).replace(microsecond=0
                                     ) if next_execution is not None else None

                    if next_execution_utc is not None:
                        if next_executed_task is None or next_execution_utc < next_executed_task[
                                0]:
                            next_executed_task = (next_execution_utc, task)

                    if already_ran_this_minute:
                        continue

                    # test if task needs te be executed since last run of ops automator
                    execute_dt_since_last = task_cron_expression.last_since(
                        localized_last_run, localized_current_dt)

                    if execute_dt_since_last is None:
                        if next_execution is not None:
                            next_execution = next_execution.astimezone(
                                task_timezone)
                            self._logger.info(INFO_NEXT_EXECUTION, task_name,
                                              next_execution.isoformat(),
                                              task_timezone)
                        else:
                            self._logger.info(INFO_NO_NEXT_WITHIN, task_name)
                        continue

                    self._logger.info(INFO_SCHEDULED_TASK, task_name,
                                      execute_dt_since_last, task_timezone,
                                      str(safe_json(task, indent=2)))

                    # create an event for lambda function that starts execution by selecting for resources for this task
                    task_group, sub_tasks = self._execute_task(
                        task, execute_dt_since_last)

                    started_tasks[task_name] = {
                        "task-group": task_group,
                        "sub-tasks": sub_tasks
                    }

                if started_tasks:
                    self._logger.info(INFO_STARTED_TASKS, enabled_tasks,
                                      ",".join(started_tasks))
                else:
                    self._logger.info(INFO_NO_TASKS_STARTED, enabled_tasks)

                self._set_next_schedule_event(current_dt, next_executed_task)

                running_time = float((datetime.now() - start).total_seconds())

                return safe_dict({
                    "datetime": datetime.now().isoformat(),
                    "running-time": running_time,
                    "event-datetime": current_dt.isoformat(),
                    "enabled_tasks": enabled_tasks,
                    "started-tasks": started_tasks
                })

            except ValueError as ex:
                self._logger.error(ERR_SCHEDULE_HANDLER, ex,
                                   safe_json(task, indent=2))
示例#13
0
def ecs_handler(args):
    dt = datetime.utcnow()
    log_stream = LOG_STREAM.format("OpsAutomatorMainEcs", dt.year, dt.month,
                                   dt.day)

    with outputs.queued_logger.QueuedLogger(logstream=log_stream,
                                            context=None,
                                            buffersize=20) as logger:

        action_step = args.get(handlers.HANDLER_EVENT_ACTION, None)
        if action_step is None:
            logger.error(ERR_ECS_NO_PARAM, handlers.HANDLER_EVENT_ACTION,
                         safe_json(args, indent=3))
            return

        event = {}
        task_item = {}

        if action_step in [
                handlers.HANDLER_ACTION_EXECUTE,
                handlers.HANDLER_ACTION_TEST_COMPLETION
        ]:
            task_id = args.get(handlers.TASK_TR_ID, None)
            if task_id is None:
                logger.error(ERR_ECS_NO_TASK_ID, handlers.TASK_TR_ID,
                             safe_json(args, indent=3))
                return

            expected_status = handlers.STATUS_PENDING \
                if action_step == handlers.HANDLER_ACTION_EXECUTE \
                else handlers.STATUS_WAIT_FOR_COMPLETION
            task_item = handlers.task_tracking_table.TaskTrackingTable(
                logger=logger,
                context=EcsTaskContext(timeout_seconds=300)).get_task_item(
                    task_id, status=expected_status)

            if task_item is None:
                logger.error(ECS_TASK_NOT_FOUND_FOR_STEP, task_id,
                             expected_status, action_step)
                return

            event = {i: task_item.get(i) for i in task_item}
            event[handlers.HANDLER_EVENT_ACTION] = action_step

        elif action_step in [handlers.HANDLER_ACTION_SELECT_RESOURCES]:
            task_name = actions.ACTION_ID = args.get(handlers.TASK_NAME, None)
            if task_name is None:
                logger.error(ERR_ECS_NO_TASK_NAME,
                             configuration.CONFIG_TASK_NAME,
                             safe_json(args, indent=3))
                return

            task_item = configuration.task_configuration.TaskConfiguration(
                logger=logger, context=None).get_task(task_name)
            if task_item is None:
                logger.error(ECS_TASK_DOES_NOT_EXIST, task_name)
                return

            event = {
                handlers.HANDLER_EVENT_ACTION:
                handlers.HANDLER_ACTION_SELECT_RESOURCES,
                handlers.HANDLER_EVENT_TASK: task_item,
                handlers.HANDLER_EVENT_SOURCE: "ecs_handler",
                handlers.HANDLER_EVENT_TASK_DT: datetime.now().isoformat()
            }

        return lambda_handler(
            event=event,
            context=EcsTaskContext(
                timeout_seconds=task_item.get(handlers.TASK_TIMEOUT, 3600)))
    def handle_request(self):
        """
        Handles the event
        :return: result of handling the event, result send back to REST admin api
        """
        def snake_to_pascal_case(s):
            converted = ""
            s = s.strip("_").capitalize()
            i = 0

            while i < len(s):
                if s[i] == "-":
                    pass
                elif s[i] == "_":
                    i += 1
                    converted += s[i].upper()
                else:
                    converted += s[i]
                i += 1

            return converted

        def dict_to_pascal_case(d):

            ps = {}

            if isinstance(d, dict):
                for i in d:
                    key = snake_to_pascal_case(i)
                    ps[key] = dict_to_pascal_case(d[i])
                return ps

            elif isinstance(d, list):
                return [dict_to_pascal_case(l) for l in d]

            return d

        try:
            self._logger.info("Handler {} : Received CLI request \n{}",
                              self.__class__.__name__,
                              safe_json(self._event, indent=3))

            # get access to admin api module
            admin_module = configuration.task_admin_api

            # get api action and map it to a function in the admin API
            fn_name = self.commands.get(self.action, None)
            if fn_name is None:
                raise ValueError("Command {} does not exist".format(
                    self.action))
            fn = getattr(admin_module, fn_name)

            # calling the mapped admin api method
            self._logger.info("Calling \"{}\" with parameters \n{}",
                              fn.__name__, safe_json(self.parameters,
                                                     indent=3))

            args = self.parameters
            args["context"] = self._context
            api_result = fn(**args)

            # convert to awscli PascalCase output format
            result = dict_to_pascal_case(api_result)

            # perform output transformation
            if fn_name in self.result_transformations:
                result = jmespath.search(self.result_transformations[fn_name],
                                         result)

            for t in self.attribute_transformations:
                if t in result:
                    if self.attribute_transformations[t] is not None:
                        result[self.attribute_transformations[t]] = result[t]
                        del result[t]

            # log formatted result
            json_result = safe_json(result, 3)
            self._logger.info("Call result is {}", json_result)

            return result

        except Exception as ex:
            self._logger.info("Call failed, error is {}", str(ex))
            return {"Error": str(ex)}
        finally:
            self._logger.flush()
示例#15
0
    def run(self,
            parameters,
            complete_check_polling_interval=60,
            task_timeout=None,
            task_name=None,
            datetime_delta=None,
            events=None,
            tag_filter=None,
            run_in_regions=None,
            action_select_params=None,
            debug=False,
            run_after_select=None):

        self.results = []
        self.executed_tasks = []
        self.parameters = parameters
        self.action_select_parameters = action_select_params if action_select_params is not None else {}
        self.task_name = task_name if task_name is not None else "{}-test".format(
            self.action_name).lower()
        self._ensure_action_stack()

        self.run_after_select = run_after_select

        self.context = Context()

        self._events = events if events is not None else {}
        self._tag_filter = tag_filter

        save_debug = self.debug
        self.debug = debug
        self.logger._debug = self.debug

        self.interval = parameters.get(actions.ACTION_PARAM_INTERVAL, None)

        self.run_in_regions = run_in_regions if run_in_regions is not None else [
            self.test_region
        ]

        try:
            if datetime_delta is not None:
                set_datetime_delta(datetime_delta)
                actions.set_date_time_provider(DatetimeProvider)
                self.logger.test(
                    "Setting simulated test execution date and time to {}",
                    actions.date_time_provider().now())

            for executed_task in self._get_tasks_to_execute():
                try:
                    self.executed_tasks.append(executed_task)
                    self.logger.test(
                        "Start execution of action {} using assumed role {} in region {}",
                        self.action_name, self._assumed_role,
                        self.tested_region)
                    start_result = executed_task.execute()
                    if not executed_task.get(
                            actions.ACTION_PARAM_HAS_COMPLETION, False):
                        self.logger.test("Action completed with result {}",
                                         safe_json(start_result, indent=3))
                        setattr(executed_task, handlers.TASK_TR_RESULT,
                                start_result)
                        setattr(executed_task, handlers.TASK_TR_STATUS,
                                handlers.STATUS_COMPLETED)
                    else:
                        self.logger.test("Waiting for task to complete")
                        setattr(executed_task, handlers.TASK_TR_START_RESULT,
                                start_result)

                        # noinspection PyProtectedMember
                        timeout = executed_task._timeout_
                        if timeout is None:
                            timeout = task_timeout * 60 if task_timeout is not None else 60
                        timeout *= 60
                        with Timer(timeout_seconds=timeout) as timer:
                            while True:
                                is_completed = getattr(
                                    executed_task, handlers.COMPLETION_METHOD,
                                    None)
                                if is_completed is None:
                                    raise Exception(
                                        "Tested action needs completion but does not implement the required {} method",
                                        handlers.COMPLETION_METHOD)
                                complete_result = executed_task.is_completed(
                                    start_result)
                                if complete_result is not None:
                                    self.logger.test(
                                        "Action completed with result {}",
                                        safe_json(complete_result, indent=3))
                                    setattr(executed_task,
                                            handlers.TASK_TR_STATUS,
                                            handlers.STATUS_COMPLETED)
                                    setattr(executed_task,
                                            handlers.TASK_TR_RESULT,
                                            complete_result)
                                    break
                                if timer.timeout:
                                    self.logger.test("Action timed out")
                                    setattr(executed_task,
                                            handlers.TASK_TR_STATUS,
                                            handlers.STATUS_TIMED_OUT)
                                    setattr(executed_task,
                                            handlers.TASK_TR_ERROR, "Timeout")
                                    break
                                self.logger.test(
                                    "Action not completed yet, waiting {} seconds",
                                    complete_check_polling_interval)
                                time.sleep(complete_check_polling_interval)
                    self.results.append(executed_task)

                except Exception as ex:
                    self.logger.test("Action failed {}", str(ex))
                    setattr(executed_task, handlers.TASK_TR_STATUS,
                            handlers.STATUS_FAILED)
                    setattr(executed_task, handlers.TASK_TR_ERROR, str(ex))
                    self.results.append(executed_task)

        finally:
            if datetime_delta is not None:
                actions.reset_date_provider()
            self.debug = save_debug
            self.logger._debug = self.debug

        return self.results