示例#1
0
    def process_asg(self, asg):
        """Multistep process to stop an asg aprori of setup

        - suspend processes
        - stop instances
        """
        session = local_session(self.manager.session_factory)
        asg_client = session.client('autoscaling')
        try:
            self.manager.retry(
                asg_client.suspend_processes,
                AutoScalingGroupName=asg['AutoScalingGroupName'])
        except ClientError as e:
            if e.response['Error']['Code'] == 'ValidationError':
                return
            raise
        ec2_client = session.client('ec2')
        try:
            instance_ids = [i['InstanceId'] for i in asg['Instances']]
            if not instance_ids:
                return
            retry = get_retry((
                'RequestLimitExceeded', 'Client.RequestLimitExceeded'))
            retry(ec2_client.stop_instances, InstanceIds=instance_ids)
        except ClientError as e:
            if e.response['Error']['Code'] in (
                    'InvalidInstanceID.NotFound',
                    'IncorrectInstanceState'):
                log.warning("Erroring stopping asg instances %s %s" % (
                    asg['AutoScalingGroupName'], e))
                return
            raise
示例#2
0
    def process_resource(self, target, key, tags, snapshot):
        p = {}
        if key:
            p['KmsKeyId'] = key
        p['TargetDBSnapshotIdentifier'] = snapshot[
            'DBSnapshotIdentifier'].replace(':', '-')
        p['SourceRegion'] = self.manager.config.region
        p['SourceDBSnapshotIdentifier'] = snapshot['DBSnapshotArn']

        if self.data.get('copy_tags', True):
            p['CopyTags'] = True
        if tags:
            p['Tags'] = tags

        retry = get_retry(
            ('SnapshotQuotaExceeded',),
            # TODO make this configurable, class defaults to 1hr
            min_delay=self.min_delay,
            max_attempts=self.max_attempts,
            log_retries=logging.DEBUG)
        try:
            result = retry(target.copy_db_snapshot, **p)
        except ClientError as e:
            if e.response['Error']['Code'] == 'DBSnapshotAlreadyExists':
                self.log.warning(
                    "Snapshot %s already exists in target region",
                    snapshot['DBSnapshotIdentifier'])
                return
            raise
        snapshot['c7n:CopiedSnapshot'] = result[
            'DBSnapshot']['DBSnapshotArn']
示例#3
0
    def __new__(cls, name, parents, attrs):
        if 'resource_type' not in attrs:
            return super(QueryMeta, cls).__new__(cls, name, parents, attrs)

        if 'filter_registry' not in attrs:
            attrs['filter_registry'] = FilterRegistry(
                '%s.filters' % name.lower())
        if 'action_registry' not in attrs:
            attrs['action_registry'] = ActionRegistry(
                '%s.filters' % name.lower())

        if attrs['resource_type']:
            m = ResourceQuery.resolve(attrs['resource_type'])
            # Generic cloud watch metrics support
            if m.dimension:
                attrs['filter_registry'].register('metrics', MetricsFilter)
            # EC2 Service boilerplate ...
            if m.service == 'ec2':
                # Generic ec2 retry
                attrs['retry'] = staticmethod(get_retry((
                    'RequestLimitExceeded', 'Client.RequestLimitExceeded')))
                # Generic ec2 resource tag support
                if getattr(m, 'taggable', True):
                    register_ec2_tags(
                        attrs['filter_registry'], attrs['action_registry'])
            if getattr(m, 'universal_taggable', False):
                register_universal_tags(
                    attrs['filter_registry'], attrs['action_registry'])

        return super(QueryMeta, cls).__new__(cls, name, parents, attrs)
示例#4
0
 def process_stack(self, stack):
     client = local_session(
         self.manager.session_factory).client('opsworks')
     try:
         stack_id = stack['StackId']
         for app in client.describe_apps(StackId=stack_id)['Apps']:
             client.delete_app(AppId=app['AppId'])
         instances = client.describe_instances(StackId=stack_id)['Instances']
         orig_length = len(instances)
         instances = self.filter_instance_state(instances)
         if(len(instances) != orig_length):
             self.log.exception(
                 "All instances must be stopped before deletion. Stack Id: %s Name: %s." %
                 (stack_id, stack['Name']))
             return
         for instance in instances:
             instance_id = instance['InstanceId']
             # Validation Exception raised for instances that are stopping when delete is called
             retryable = ('ValidationException'),
             retry = utils.get_retry(retryable, max_attempts=8)
             try:
                 retry(client.delete_instance, InstanceId=instance_id)
             except ClientError as e2:
                 if e2.response['Error']['Code'] in retryable:
                     return True
                 raise
         for layer in client.describe_layers(StackId=stack_id)['Layers']:
             client.delete_layer(LayerId=layer['LayerId'])
         client.delete_stack(StackId=stack_id)
     except ClientError as e:
         self.log.exception(
             "Exception deleting stack:\n %s" % e)
示例#5
0
 def __init__(self, log_group=__name__, log_stream=None,
              session_factory=None):
     super(CloudWatchLogHandler, self).__init__()
     self.log_group = log_group
     self.log_stream = log_stream
     self.session_factory = session_factory or boto3.Session
     self.transport = None
     self.queue = Queue.Queue()
     self.threads = []
     # do some basic buffering before sending to transport to minimize
     # queue/threading overhead
     self.buf = []
     self.last_seen = time.time()
     # Logging module internally is tracking all handlers, for final
     # cleanup atexit, custodian is a bit more explicitly scoping shutdown to
     # each policy, so use a sentinel value to avoid deadlocks.
     self.shutdown = False
     retry = get_retry(('ThrottlingException',))
     try:
         client = self.session_factory().client('logs')
         logs = retry(
             client.describe_log_groups,
             logGroupNamePrefix=self.log_group)['logGroups']
         if not [l for l in logs if l['logGroupName'] == self.log_group]:
             retry(client.create_log_group,
                   logGroupName=self.log_group)
     except ClientError as e:
         if Error.code(e) != Error.ResourceExists:
             raise
示例#6
0
    def test_retry_passthrough(self):

        def func():
            return 42

        retry = utils.get_retry((), 5)
        self.assertEqual(retry(func), 42)
示例#7
0
def assumed_session(role_arn, session_name, session=None, region=None, external_id=None):
    """STS Role assume a boto3.Session

    With automatic credential renewal.

    Args:
      role_arn: iam role arn to assume
      session_name: client session identifier
      session: an optional extant session, note session is captured
      in a function closure for renewing the sts assumed role.

    :return: a boto3 session using the sts assumed role credentials

    Notes: We have to poke at botocore internals a few times
    """
    if session is None:
        session = Session()

    retry = get_retry(('Throttling',))

    def refresh():

        parameters = {"RoleArn": role_arn, "RoleSessionName": session_name}

        if external_id is not None:
            parameters['ExternalId'] = external_id

        credentials = retry(
            session.client('sts').assume_role, **parameters)['Credentials']
        return dict(
            access_key=credentials['AccessKeyId'],
            secret_key=credentials['SecretAccessKey'],
            token=credentials['SessionToken'],
            # Silly that we basically stringify so it can be parsed again
            expiry_time=credentials['Expiration'].isoformat())

    session_credentials = RefreshableCredentials.create_from_metadata(
        metadata=refresh(),
        refresh_using=refresh,
        method='sts-assume-role')

    # so dirty.. it hurts, no clean way to set this outside of the
    # internals poke. There's some work upstream on making this nicer
    # but its pretty baroque as well with upstream support.
    # https://github.com/boto/boto3/issues/443
    # https://github.com/boto/botocore/issues/761

    s = get_session()
    s._credentials = session_credentials
    if region is None:
        region = s.get_config_variable('region') or 'us-east-1'
    s.set_config_variable('region', region)
    return Session(botocore_session=s)
示例#8
0
 def process_instance_set(self, client, instances):
     # Setup retry with insufficient capacity as well
     retryable = ('InsufficientInstanceCapacity', 'RequestLimitExceeded',
                  'Client.RequestLimitExceeded'),
     retry = utils.get_retry(retryable, max_attempts=5)
     instance_ids = [i['InstanceId'] for i in instances]
     try:
         retry(client.reboot_instances, InstanceIds=instance_ids)
     except ClientError as e:
         if e.response['Error']['Code'] in retryable:
             return True
         raise
示例#9
0
    def resume_asg_instances(self, asg):
        """Resume asg instances.
        """
        session = local_session(self.manager.session_factory)
        ec2_client = session.client('ec2')
        instance_ids = [i['InstanceId'] for i in asg['Instances']]
        if not instance_ids:
            return

        retry = get_retry((
            'RequestLimitExceeded', 'Client.RequestLimitExceeded'))
        retry(ec2_client.start_instances, InstanceIds=instance_ids)
示例#10
0
    def process(self, resources):
        client = local_session(self.manager.session_factory).client('ecs')
        retry = get_retry(('Throttling',))

        for r in resources:
            try:
                retry(client.deregister_task_definition,
                      taskDefinition=r['taskDefinitionArn'])
            except ClientError as e:
                # No error code for not found.
                if e.response['Error'][
                        'Message'] != 'The specified task definition does not exist.':
                    raise
示例#11
0
    def process_snapshot_set(self, client, snapshots_set):
        retry = get_retry((
            'RequestLimitExceeded', 'Client.RequestLimitExceeded'))

        for s in snapshots_set:
            if s['SnapshotId'] in self.image_snapshots:
                continue
            try:
                retry(client.delete_snapshot,
                      SnapshotId=s['SnapshotId'],
                      DryRun=self.manager.config.dryrun)
            except ClientError as e:
                if e.response['Error']['Code'] == "InvalidSnapshot.NotFound":
                    continue
                raise
示例#12
0
    def process(self, resources):
        client = local_session(self.manager.session_factory).client('ecs')
        retry = get_retry(('Throttling',))
        reason = self.data.get('reason', 'custodian policy')

        for r in resources:
            try:
                retry(client.stop_task,
                      cluster=r['clusterArn'],
                      task=r['taskArn'],
                      reason=reason)
            except ClientError as e:
                # No error code for not found.
                if e.response['Error']['Message'] != "The referenced task was not found.":
                    raise
示例#13
0
    def test_retry_errors(self):
        self.patch(time, 'sleep', lambda x: x)
        self.count = 0

        def func():
            self.count += 1
            raise ClientError({'Error': {'Code': 42}}, 'something')

        retry = utils.get_retry((42,), 5)

        try:
            retry(func)
        except ClientError:
            self.assertEqual(self.count, 5)
        else:
            self.fail("should have raised")
示例#14
0
    def test_retry_errors(self):
        self.patch(time, "sleep", lambda x: x)
        self.count = 0

        def func():
            self.count += 1
            raise ClientError({"Error": {"Code": 42}}, "something")

        retry = utils.get_retry((42,), 5)

        try:
            retry(func)
        except ClientError:
            self.assertEqual(self.count, 5)
        else:
            self.fail("should have raised")
示例#15
0
 def process(self, resources):
     client = local_session(self.manager.session_factory).client('ecs')
     retry = get_retry(('Throttling',))
     for r in resources:
         try:
             primary = [d for d in r['deployments']
                        if d['status'] == 'PRIMARY'].pop()
             if primary['desiredCount'] > 0:
                 retry(client.update_service,
                       cluster=r['clusterArn'],
                       service=r['serviceName'],
                       desiredCount=0)
             retry(client.delete_service,
                   cluster=r['clusterArn'], service=r['serviceName'])
         except ClientError as e:
             if e.response['Error']['Code'] != 'ServiceNotFoundException':
                 raise
示例#16
0
 def process_instance_set(self, client, instances, itype, izone):
     # Setup retry with insufficient capacity as well
     retry = utils.get_retry((
         'InsufficientInstanceCapacity',
         'RequestLimitExceeded', 'Client.RequestLimitExceeded'),
         max_attempts=5)
     instance_ids = [i['InstanceId'] for i in instances]
     try:
         retry(client.start_instances, InstanceIds=instance_ids)
     except ClientError as e:
         if e.response['Error']['Code'] == 'InsufficientInstanceCapacity':
             self.log.exception(
                 ("Could not start instances:%d type:%s"
                  " zone:%s instances:%s error:%s"),
                 len(instances), itype, izone,
                 ", ".join(instance_ids), e)
             return
         self.log.exception("Error while starting instances error %s", e)
         raise
示例#17
0
def filter_last_write(client, groups, start):
    """Filter log groups where the last write was before the start date.
    """
    retry = get_retry(('ThrottlingException',))

    def process_group(group_set):
        matched = []
        for g in group_set:
            streams = retry(
                client.describe_log_streams,
                logGroupName=g['logGroupName'],
                orderBy='LastEventTime',
                limit=1, descending=True)
            if not streams.get('logStreams'):
                continue
            stream = streams['logStreams'][0]
            if stream['storedBytes'] == 0 and datetime.fromtimestamp(
                    stream['creationTime'] / 1000) > start:
                matched.append(g)
            elif 'lastIngestionTime' in stream and datetime.fromtimestamp(
                    stream['lastIngestionTime'] / 1000) > start:
                matched.append(g)
        return matched

    results = []

    with ThreadPoolExecutor(max_workers=3) as w:
        futures = {}
        for group_set in chunks(groups, 10):
            futures[w.submit(process_group, group_set)] = group_set

        for f in as_completed(futures):
            if f.exception():
                log.error(
                    "Error processing groupset:%s error:%s",
                    group_set,
                    f.exception())
            results.extend(f.result())

    return results
示例#18
0
    def get_related_ids(self, resources):

        if self.efs_group_cache:
            group_ids = set()
            for r in resources:
                group_ids.update(
                    self.efs_group_cache.get(r['MountTargetId'], ()))
            return list(group_ids)

        client = local_session(self.manager.session_factory).client('efs')
        groups = {}
        group_ids = set()
        retry = get_retry(('Throttled',), 12)

        for r in resources:
            groups[r['MountTargetId']] = retry(
                client.describe_mount_target_security_groups,
                MountTargetId=r['MountTargetId'])['SecurityGroups']
            group_ids.update(groups[r['MountTargetId']])

        self.efs_group_cache = groups
        return list(group_ids)
示例#19
0
from datetime import datetime, timedelta
from dateutil.parser import parse
from dateutil.tz import tzutc

import itertools

from c7n.actions import BaseAction as Action, AutoTagUser
from c7n.filters import Filter, OPERATORS, FilterValidationError
from c7n import utils

DEFAULT_TAG = "maid_status"

universal_tag_retry = utils.get_retry((
    'Throttled',
    'RequestLimitExceeded',
    'Client.RequestLimitExceeded'
))


def register_ec2_tags(filters, actions):
    filters.register('marked-for-op', TagActionFilter)
    filters.register('tag-count', TagCountFilter)

    actions.register('auto-tag-user', AutoTagUser)
    actions.register('mark-for-op', TagDelayedAction)
    actions.register('tag-trim', TagTrim)

    actions.register('mark', Tag)
    actions.register('tag', Tag)
示例#20
0
        }, client.meta.service_model.operation_model('ListProtections'))


def get_type_protections(client, model):
    pager = get_protections_paginator(client)
    pager.PAGE_ITERATOR_CLS = RetryPageIterator
    try:
        protections = pager.paginate().build_full_result().get(
            'Protections', [])
    except client.exceptions.ResourceNotFoundException:
        # shield is not enabled in the account, so all resources are not protected
        return []
    return [p for p in protections if model.arn_type in p['ResourceArn']]


ShieldRetry = get_retry(('ThrottlingException', ))


class IsShieldProtected(Filter):

    permissions = ('shield:ListProtections', )
    schema = type_schema('shield-enabled', state={'type': 'boolean'})

    def process(self, resources, event=None):
        client = local_session(self.manager.session_factory).client(
            'shield', region_name='us-east-1')

        protections = get_type_protections(client, self.manager.get_model())
        protected_resources = {p['ResourceArn'] for p in protections}

        state = self.data.get('state', False)
示例#21
0
# limitations under the License.
"""
S3 Key Encrypt on Bucket Changes
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import json
import urllib
import boto3
from botocore.exceptions import ClientError

from c7n.resources.s3 import EncryptExtantKeys
from c7n.utils import get_retry

s3 = config = None
retry = get_retry(['404', '503'], max_attempts=4, min_delay=2)


def init():
    global s3, config
    if s3 is not None:
        return

    s3 = boto3.client('s3')
    with open('config.json') as fh:
        config = json.load(fh)
        # multipart copy can on multigb file can take a long time
        config['large'] = False


def process_key_event(event, context):
示例#22
0
def export(group, bucket, prefix, start, end, role, session=None):
    start = start and isinstance(start, basestring) and parse(start) or start
    end = (end and isinstance(start, basestring) and
           parse(end) or end or datetime.now())
    start = start.replace(tzinfo=tzlocal()).astimezone(tzutc())
    end = end.replace(tzinfo=tzlocal()).astimezone(tzutc())

    if session is None:
        session = get_session(role)

    client = session.client('logs')
    retry = get_retry(('LimitExceededException',), min_delay=4)

    if prefix:
        prefix = "%s/%s" % (prefix.rstrip('/'),
                            group['logGroupName'].strip('/'))
    else:
        prefix = group

    log.debug("Log exporting group:%s start:%s end:%s bucket:%s prefix:%s",
              group,
              start.strftime('%Y/%m/%d'),
              end.strftime('%Y/%m/%d'),
              bucket,
              prefix)

    t = time.time()
    days = [start + timedelta(i) for i in range((end - start).days)]
    day_count = len(days)
    days = filter_extant_exports(
        boto3.Session().client('s3'), bucket, prefix, days, start, end)

    log.debug("Filtering s3 extant keys from %d to %d in %0.2f",
              day_count, len(days), time.time() - t)
    t = time.time()

    for idx, d in enumerate(days):
        date = d.replace(minute=0, microsecond=0, hour=0)
        export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d"))
        params = {
            'taskName': "%s-%s" % ("c7n-log-exporter",
                                   date.strftime("%Y-%m-%d")),
            'logGroupName': group['logGroupName'],
            'fromTime': int(time.mktime(
                date.replace(
                    minute=0, microsecond=0, hour=0).timetuple()) * 1000),
            'to': int(time.mktime(
                date.replace(
                    minute=59, hour=23, microsecond=0).timetuple()) * 1000),
            'destination': bucket,
            'destinationPrefix': export_prefix
        }

        # if stream_prefix:
        #    params['logStreamPrefix'] = stream_prefix

        result = retry(client.create_export_task, **params)
        log.debug("Log export group:%s day:%s bucket:%s prefix:%s task:%s",
                  group,
                  params['taskName'],
                  bucket,
                  params['destinationPrefix'],
                  result['taskId'])

    log.info(("Exported log group:%s time:%0.2f days:%d start:%s"
              " end:%s bucket:%s prefix:%s"),
             group,
             time.time() - t,
             idx,
             start.strftime('%Y/%m/%d'),
             end.strftime('%Y/%m/%d'),
             bucket,
             prefix)
示例#23
0
                    {"required": ['role']}
                ],
                'required': ['name', 'bucket', 'regions', 'title', 'id'],
                'properties': {
                    'name': {'type': 'string'},
                    'title': {'type': 'string'},
                    'tags': {'type': 'object'},
                    'bucket': {'type': 'string'},
                    'regions': {'type': 'array', 'items': {'type': 'string'}}
                }
            }
        }
    }
}

retry = get_retry(('Throttling',), log_retries=True)


indexers = PluginRegistry('policy-metrics-indexers')


class Indexer(object):
    """ Metrics indexer
    """


def get_indexer(config, **kwargs):
    itype = config['indexer']['type']
    klass = indexers.get(itype)
    return klass(config, **kwargs)
示例#24
0
 def process(self, volumes):
     client = local_session(self.manager.session_factory).client('ec2')
     retry = get_retry(['Throttled'], max_attempts=5)
     for vol in volumes:
         vol_id = vol['VolumeId']
         retry(client.create_snapshot, VolumeId=vol_id)
示例#25
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
S3 Key Encrypt on Bucket Changes
"""
import json

import boto3
from botocore.exceptions import ClientError

from c7n.resources.s3 import EncryptExtantKeys
from c7n.utils import get_retry

s3 = config = None
retry = get_retry(['404', '503'])


def init():
    global s3, config
    if s3 is not None:
        return

    s3 = boto3.client('s3')
    with open('config.json') as fh:
        config = json.load(fh)
        # multipart copy can on multigb file can take a long time
        config['large'] = False


def process_key_event(event, context):
示例#26
0
 def process(self, resources):
     client = local_session(self.manager.session_factory).client('efs')
     self.unmount_filesystems(resources)
     retry = get_retry(('FileSystemInUse',), 12)
     for r in resources:
         retry(client.delete_file_system, FileSystemId=r['FileSystemId'])
示例#27
0
def export(group, bucket, prefix, start, end, role, poll_period=120, session=None, name=""):
    """export a given log group to s3"""
    start = start and isinstance(start, six.string_types) and parse(start) or start
    end = (end and isinstance(start, six.string_types) and
           parse(end) or end or datetime.now())
    start = start.replace(tzinfo=tzlocal()).astimezone(tzutc())
    end = end.replace(tzinfo=tzlocal()).astimezone(tzutc())

    if session is None:
        session = get_session(role)

    client = session.client('logs')

    paginator = client.get_paginator('describe_log_groups')
    for p in paginator.paginate():
        found = False
        for _group in p['logGroups']:
            if _group['logGroupName'] == group:
                group = _group
                found = True
                break
        if found:
            break

    if not found:
        raise ValueError("Log group %s not found." % group)

    if prefix:
        prefix = "%s/%s" % (prefix.rstrip('/'), group['logGroupName'].strip('/'))
    else:
        prefix = group['logGroupName']

    named_group = "%s:%s" % (name, group['logGroupName'])
    log.info(
        "Log exporting group:%s start:%s end:%s bucket:%s prefix:%s size:%s",
        named_group,
        start.strftime('%Y/%m/%d'),
        end.strftime('%Y/%m/%d'),
        bucket,
        prefix,
        group['storedBytes'])

    t = time.time()
    days = [(
        start + timedelta(i)).replace(minute=0, hour=0, second=0, microsecond=0)
        for i in range((end - start).days)]
    day_count = len(days)
    s3 = boto3.Session().client('s3')
    days = filter_extant_exports(s3, bucket, prefix, days, start, end)

    log.info("Group:%s filtering s3 extant keys from %d to %d start:%s end:%s",
             named_group, day_count, len(days),
             days[0] if days else '', days[-1] if days else '')
    t = time.time()

    retry = get_retry(('SlowDown',))

    for idx, d in enumerate(days):
        date = d.replace(minute=0, microsecond=0, hour=0)
        export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d"))
        params = {
            'taskName': "%s-%s" % ("c7n-log-exporter",
                                   date.strftime("%Y-%m-%d")),
            'logGroupName': group['logGroupName'],
            'fromTime': int(time.mktime(
                date.replace(
                    minute=0, microsecond=0, hour=0).timetuple()) * 1000),
            'to': int(time.mktime(
                date.replace(
                    minute=59, hour=23, microsecond=0).timetuple()) * 1000),
            'destination': bucket,
            'destinationPrefix': export_prefix
        }

        # if stream_prefix:
        #    params['logStreamPrefix'] = stream_prefix
        try:
            s3.head_object(Bucket=bucket, Key=prefix)
        except ClientError as e:
            if e.response['Error']['Code'] != '404':  # Not Found
                raise
            s3.put_object(
                Bucket=bucket,
                Key=prefix,
                Body=json.dumps({}),
                ACL="bucket-owner-full-control",
                ServerSideEncryption="AES256")

        t = time.time()
        counter = 0
        while True:
            counter += 1
            try:
                result = client.create_export_task(**params)
            except ClientError as e:
                if e.response['Error']['Code'] == 'LimitExceededException':
                    time.sleep(poll_period)
                    # log every 30m of export waiting
                    if counter % 6 == 0:
                        log.debug(
                            "group:%s day:%s waiting for %0.2f minutes",
                            named_group, d.strftime('%Y-%m-%d'),
                            (counter * poll_period) / 60.0)
                    continue
                raise
            retry(
                s3.put_object_tagging,
                Bucket=bucket, Key=prefix,
                Tagging={
                    'TagSet': [{
                        'Key': 'LastExport',
                        'Value': d.isoformat()}]})
            break

        log.info(
            "Log export time:%0.2f group:%s day:%s bucket:%s prefix:%s task:%s",
            time.time() - t,
            named_group,
            d.strftime("%Y-%m-%d"),
            bucket,
            params['destinationPrefix'],
            result['taskId'])

    log.info(
        ("Exported log group:%s time:%0.2f days:%d start:%s"
         " end:%s bucket:%s prefix:%s"),
        named_group,
        time.time() - t,
        len(days),
        start.strftime('%Y/%m/%d'),
        end.strftime('%Y/%m/%d'),
        bucket,
        prefix)
示例#28
0
from concurrent.futures import as_completed

from datetime import datetime, timedelta
from dateutil import zoneinfo
from dateutil.parser import parse

import itertools

from c7n.actions import BaseAction as Action, AutoTagUser
from c7n.filters import Filter, OPERATORS, FilterValidationError
from c7n.filters.offhours import Time
from c7n import utils

DEFAULT_TAG = "maid_status"

universal_tag_retry = utils.get_retry(
    ('Throttled', 'RequestLimitExceeded', 'Client.RequestLimitExceeded'))


def register_ec2_tags(filters, actions):
    filters.register('marked-for-op', TagActionFilter)
    filters.register('tag-count', TagCountFilter)

    actions.register('auto-tag-user', AutoTagUser)
    actions.register('mark-for-op', TagDelayedAction)
    actions.register('tag-trim', TagTrim)

    actions.register('mark', Tag)
    actions.register('tag', Tag)

    actions.register('unmark', RemoveTag)
    actions.register('untag', RemoveTag)
示例#29
0
class ConfigSource:

    retry = staticmethod(get_retry(('ThrottlingException', )))

    def __init__(self, manager):
        self.manager = manager

    def get_permissions(self):
        return [
            "config:GetResourceConfigHistory", "config:ListDiscoveredResources"
        ]

    def get_resources(self, ids, cache=True):
        client = local_session(self.manager.session_factory).client('config')
        results = []
        m = self.manager.get_model()
        for i in ids:
            revisions = self.retry(client.get_resource_config_history,
                                   resourceId=i,
                                   resourceType=m.config_type,
                                   limit=1).get('configurationItems')
            if not revisions:
                continue
            results.append(self.load_resource(revisions[0]))
        return list(filter(None, results))

    def get_query_params(self, query):
        """Parse config select expression from policy and parameter.

        On policy config supports a full statement being given, or
        a clause that will be added to the where expression.

        If no query is specified, a default query is utilized.

        A valid query should at minimum select fields
        for configuration, supplementaryConfiguration and
        must have resourceType qualifier.
        """
        if query and not isinstance(query, dict):
            raise PolicyExecutionError("invalid config source query %s" %
                                       (query, ))

        if query is None and 'query' in self.manager.data:
            _q = [q for q in self.manager.data['query'] if 'expr' in q]
            if _q:
                query = _q.pop()

        if query is None and 'query' in self.manager.data:
            _c = [
                q['clause'] for q in self.manager.data['query']
                if 'clause' in q
            ]
            if _c:
                _c = _c.pop()
        elif query:
            return query
        else:
            _c = None

        s = "select configuration, supplementaryConfiguration where resourceType = '{}'".format(
            self.manager.resource_type.config_type)

        if _c:
            s += "AND {}".format(_c)

        return {'expr': s}

    def load_resource(self, item):
        if isinstance(item['configuration'], str):
            item_config = json.loads(item['configuration'])
        else:
            item_config = item['configuration']
        resource = camelResource(item_config, implicitDate=True)
        # normalized tag loading across the many variants of config's inconsistencies.
        if ((item.get('tags')
             or item['supplementaryConfiguration'].get('Tags'))
                and 'Tags' not in resource):
            if item.get('tags'):
                resource['Tags'] = [{
                    u'Key': k,
                    u'Value': v
                } for k, v in item['tags'].items()]
            else:
                # config has a bit more variation on tags (serialized json, list, dict, etc)
                stags = item['supplementaryConfiguration']['Tags']
                if isinstance(stags, str):
                    stags = json.loads(stags)
                if isinstance(stags, list):
                    resource['Tags'] = [{
                        u'Key': t['key'],
                        u'Value': t['value']
                    } for t in stags]
                elif isinstance(stags, dict):
                    resource['Tags'] = [{
                        u'Key': k,
                        u'Value': v
                    } for k, v in stags.items()]
        return resource

    def get_listed_resources(self, client):
        # fallback for when config decides to arbitrarily break select
        # resource for a given resource type.
        paginator = client.get_paginator('list_discovered_resources')
        paginator.PAGE_ITERATOR_CLS = RetryPageIterator
        pages = paginator.paginate(
            resourceType=self.manager.get_model().config_type)
        results = []

        with self.manager.executor_factory(max_workers=2) as w:
            ridents = pages.build_full_result()
            resource_ids = [
                r['resourceId'] for r in ridents.get('resourceIdentifiers', ())
            ]
            self.manager.log.debug("querying %d %s resources",
                                   len(resource_ids),
                                   self.manager.__class__.__name__.lower())

            for resource_set in chunks(resource_ids, 50):
                futures = []
                futures.append(w.submit(self.get_resources, resource_set))
                for f in as_completed(futures):
                    if f.exception():
                        self.manager.log.error(
                            "Exception getting resources from config \n %s" %
                            (f.exception()))
                    results.extend(f.result())
        return results

    def resources(self, query=None):
        client = local_session(self.manager.session_factory).client('config')
        query = self.get_query_params(query)
        pager = Paginator(
            client.select_resource_config, {
                'input_token': 'NextToken',
                'output_token': 'NextToken',
                'result_key': 'Results'
            },
            client.meta.service_model.operation_model('SelectResourceConfig'))
        pager.PAGE_ITERATOR_CLS = RetryPageIterator

        results = []
        for page in pager.paginate(Expression=query['expr']):
            results.extend(
                [self.load_resource(json.loads(r)) for r in page['Results']])

        # Config arbitrarily breaks which resource types its supports for query/select
        # on any given day, if we don't have a user defined query, then fallback
        # to iteration mode.
        if not results and query == self.get_query_params({}):
            results = self.get_listed_resources(client)
        return results

    def augment(self, resources):
        return resources
示例#30
0
def export(group, bucket, prefix, start, end, role, poll_period=120,
           session=None, name="", region=None):
    """export a given log group to s3"""
    start = start and isinstance(start, six.string_types) and parse(start) or start
    end = (end and isinstance(start, six.string_types) and
           parse(end) or end or datetime.now())
    start = start.replace(tzinfo=tzlocal()).astimezone(tzutc())
    end = end.replace(tzinfo=tzlocal()).astimezone(tzutc())

    if session is None:
        session = get_session(role, region)

    client = session.client('logs')

    paginator = client.get_paginator('describe_log_groups')
    for p in paginator.paginate():
        found = False
        for _group in p['logGroups']:
            if _group['logGroupName'] == group:
                group = _group
                found = True
                break
        if found:
            break

    if not found:
        raise ValueError("Log group %s not found." % group)

    if prefix:
        prefix = "%s/%s" % (prefix.rstrip('/'), group['logGroupName'].strip('/'))
    else:
        prefix = group['logGroupName']

    named_group = "%s:%s" % (name, group['logGroupName'])
    log.info(
        "Log exporting group:%s start:%s end:%s bucket:%s prefix:%s size:%s",
        named_group,
        start.strftime('%Y/%m/%d'),
        end.strftime('%Y/%m/%d'),
        bucket,
        prefix,
        group['storedBytes'])

    t = time.time()
    days = [(
        start + timedelta(i)).replace(minute=0, hour=0, second=0, microsecond=0)
        for i in range((end - start).days)]
    day_count = len(days)
    s3 = boto3.Session().client('s3')
    days = filter_extant_exports(s3, bucket, prefix, days, start, end)

    log.info("Group:%s filtering s3 extant keys from %d to %d start:%s end:%s",
             named_group, day_count, len(days),
             days[0] if days else '', days[-1] if days else '')
    t = time.time()

    retry = get_retry(('SlowDown',))

    for idx, d in enumerate(days):
        date = d.replace(minute=0, microsecond=0, hour=0)
        export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d"))
        params = {
            'taskName': "%s-%s" % ("c7n-log-exporter",
                                   date.strftime("%Y-%m-%d")),
            'logGroupName': group['logGroupName'],
            'fromTime': int(time.mktime(
                date.replace(
                    minute=0, microsecond=0, hour=0).timetuple()) * 1000),
            'to': int(time.mktime(
                date.replace(
                    minute=59, hour=23, microsecond=0).timetuple()) * 1000),
            'destination': bucket,
            'destinationPrefix': export_prefix
        }

        # if stream_prefix:
        #    params['logStreamPrefix'] = stream_prefix
        try:
            s3.head_object(Bucket=bucket, Key=prefix)
        except ClientError as e:
            if e.response['Error']['Code'] != '404':  # Not Found
                raise
            s3.put_object(
                Bucket=bucket,
                Key=prefix,
                Body=json.dumps({}),
                ACL="bucket-owner-full-control",
                ServerSideEncryption="AES256")

        t = time.time()
        counter = 0
        while True:
            counter += 1
            try:
                result = client.create_export_task(**params)
            except ClientError as e:
                if e.response['Error']['Code'] == 'LimitExceededException':
                    time.sleep(poll_period)
                    # log every 30m of export waiting
                    if counter % 6 == 0:
                        log.debug(
                            "group:%s day:%s waiting for %0.2f minutes",
                            named_group, d.strftime('%Y-%m-%d'),
                            (counter * poll_period) / 60.0)
                    continue
                raise
            retry(
                s3.put_object_tagging,
                Bucket=bucket, Key=prefix,
                Tagging={
                    'TagSet': [{
                        'Key': 'LastExport',
                        'Value': d.isoformat()}]})
            break

        log.info(
            "Log export time:%0.2f group:%s day:%s bucket:%s prefix:%s task:%s",
            time.time() - t,
            named_group,
            d.strftime("%Y-%m-%d"),
            bucket,
            params['destinationPrefix'],
            result['taskId'])

    log.info(
        ("Exported log group:%s time:%0.2f days:%d start:%s"
         " end:%s bucket:%s prefix:%s"),
        named_group,
        time.time() - t,
        len(days),
        start.strftime('%Y/%m/%d'),
        end.strftime('%Y/%m/%d'),
        bucket,
        prefix)
示例#31
0
# limitations under the License.
"""
S3 Key Encrypt on Bucket Changes
"""
from __future__ import absolute_import, division, print_function, unicode_literals

import json

import boto3
from botocore.exceptions import ClientError

from c7n.resources.s3 import EncryptExtantKeys
from c7n.utils import get_retry

s3 = config = None
retry = get_retry(['404', '503'], max_attempts=4, min_delay=2)


def init():
    global s3, config
    if s3 is not None:
        return

    s3 = boto3.client('s3')
    with open('config.json') as fh:
        config = json.load(fh)
        # multipart copy can on multigb file can take a long time
        config['large'] = False


def process_key_event(event, context):
示例#32
0
 def process(self, resources):
     client = local_session(self.manager.session_factory).client('efs')
     self.unmount_filesystems(resources)
     retry = get_retry(('FileSystemInUse',), 12)
     for r in resources:
         retry(client.delete_file_system, FileSystemId=r['FileSystemId'])
示例#33
0
        {'input_token': 'NextToken', 'output_token': 'NextToken', 'result_key': 'Protections'},
        client.meta.service_model.operation_model('ListProtections'))


def get_type_protections(client, model):
    pager = get_protections_paginator(client)
    pager.PAGE_ITERATOR_CLS = RetryPageIterator
    try:
        protections = pager.paginate().build_full_result().get('Protections', [])
    except client.exceptions.ResourceNotFoundException:
        # shield is not enabled in the account, so all resources are not protected
        return []
    return [p for p in protections if model.type in p['ResourceArn']]


ShieldRetry = get_retry(('ThrottlingException',))


class IsShieldProtected(Filter):

    permissions = ('shield:ListProtections',)
    schema = type_schema('shield-enabled', state={'type': 'boolean'})

    def process(self, resources, event=None):
        client = local_session(self.manager.session_factory).client(
            'shield', region_name='us-east-1')

        protections = get_type_protections(client, self.manager.get_model())
        protected_resources = {p['ResourceArn'] for p in protections}

        state = self.data.get('state', False)
示例#34
0
class MetricsOutput(object):
    """Send metrics data to cloudwatch
    """

    permissions = ("cloudWatch:PutMetricData", )

    retry = staticmethod(get_retry(('Throttling', )))

    BUFFER_SIZE = 20

    @staticmethod
    def select(metrics_selector):
        if not metrics_selector:
            return NullMetricsOutput
        # Compatibility for boolean configuration
        if isinstance(metrics_selector, bool):
            metrics_selector = 'aws'
        for k in metrics_outputs.keys():
            if k.startswith(metrics_selector):
                return metrics_outputs[k]
        raise ValueError("invalid metrics option %r" % metrics_selector)

    def __init__(self, ctx, namespace=DEFAULT_NAMESPACE):
        self.ctx = ctx
        self.namespace = namespace
        self.buf = []

    def get_timestamp(self):
        """
        Now, if C7N_METRICS_TZ is set to TRUE, UTC timestamp will be used.
        For backwards compatibility, if it is not set, UTC will be the default.
        To disable this and use the system's time zone, C7N_METRICS_TZ shoule be set to FALSE.
        """

        if os.getenv("C7N_METRICS_TZ", 'TRUE').upper() in ('TRUE', ''):
            return datetime.datetime.utcnow()
        else:
            return datetime.datetime.now()

    def flush(self):
        if self.buf:
            self._put_metrics(self.namespace, self.buf)
            self.buf = []

    def put_metric(self, key, value, unit, buffer=True, **dimensions):
        point = self._format_metric(key, value, unit, dimensions)
        self.buf.append(point)
        if buffer:
            # Max metrics in a single request
            if len(self.buf) == 20:
                self.flush()
        else:
            self.flush()

    def _format_metric(self, key, value, unit, dimensions):
        d = {
            "MetricName": key,
            "Timestamp": self.get_timestamp(),
            "Value": value,
            "Unit": unit
        }
        d["Dimensions"] = [{
            "Name": "Policy",
            "Value": self.ctx.policy.name
        }, {
            "Name": "ResType",
            "Value": self.ctx.policy.resource_type
        }]
        for k, v in dimensions.items():
            d['Dimensions'].append({"Name": k, "Value": v})
        return d

    def _put_metrics(self, ns, metrics):
        watch = local_session(self.ctx.session_factory).client('cloudwatch')
        for metric_values in chunks(metrics, self.BUFFER_SIZE):
            return self.retry(watch.put_metric_data,
                              Namespace=ns,
                              MetricData=metrics)
示例#35
0
    def process_image(self, image):
        retry = get_retry(
            ('RequestLimitExceeded', 'Client.RequestLimitExceeded'))

        client = local_session(self.manager.session_factory).client('ec2')
        retry(client.deregister_image, ImageId=image['ImageId'])
示例#36
0
    def test_retry_passthrough(self):
        def func():
            return 42

        retry = utils.get_retry((), 5)
        self.assertEqual(retry(func), 42)
示例#37
0
class MetricsOutput(object):
    """Send metrics data to cloudwatch
    """

    permissions = ("cloudWatch:PutMetricData", )

    retry = staticmethod(get_retry(('Throttling', )))

    @staticmethod
    def select(metrics_enabled):
        if metrics_enabled:
            return MetricsOutput
        return NullMetricsOutput

    def __init__(self, ctx, namespace=DEFAULT_NAMESPACE):
        self.ctx = ctx
        self.namespace = namespace
        self.buf = []

    def get_timestamp(self):
        """
        Now, if C7N_METRICS_TZ is set to TRUE, UTC timestamp will be used.
        For backwards compatibility, if it is not set, UTC will be the default.
        To disable this and use the system's time zone, C7N_METRICS_TZ shoule be set to FALSE.
        """

        if os.getenv("C7N_METRICS_TZ", '').upper() in ('TRUE', ''):
            return datetime.datetime.utcnow()
        else:
            return datetime.datetime.now()

    def flush(self):
        if self.buf:
            self._put_metrics(self.namespace, self.buf)
            self.buf = []

    def put_metric(self, key, value, unit, buffer=False, **dimensions):
        d = {
            "MetricName": key,
            "Timestamp": self.get_timestamp(),
            "Value": value,
            "Unit": unit
        }
        d["Dimensions"] = [{
            "Name": "Policy",
            "Value": self.ctx.policy.name
        }, {
            "Name": "ResType",
            "Value": self.ctx.policy.resource_type
        }]
        for k, v in dimensions.items():
            d['Dimensions'].append({"Name": k, "Value": v})

        if buffer:
            self.buf.append(d)
            # Max metrics in a single request
            if len(self.buf) == 20:
                self.flush()
        else:
            self._put_metrics(self.namespace, [d])

    def _put_metrics(self, ns, metrics):
        watch = local_session(self.ctx.session_factory).client('cloudwatch')
        return self.retry(watch.put_metric_data,
                          Namespace=ns,
                          MetricData=metrics)
class MismatchS3Origin(Filter):
    """Check for existence of S3 bucket referenced by Cloudfront,
       and verify whether owner is different from Cloudfront account owner.

    :example:

    .. code-block:: yaml

            policies:
              - name: mismatch-s3-origin
                resource: distribution
                filters:
                  - type: mismatch-s3-origin
                    check_custom_origins: true
   """

    s3_prefix = re.compile(r'.*(?=\.s3(-.*)?\.amazonaws.com)')
    s3_suffix = re.compile(r'^([^.]+\.)?s3(-.*)?\.amazonaws.com')

    schema = type_schema('mismatch-s3-origin',
                         check_custom_origins={'type': 'boolean'})

    permissions = ('s3:ListBuckets', )
    retry = staticmethod(get_retry(('Throttling', )))

    def is_s3_domain(self, x):
        bucket_match = self.s3_prefix.match(x['DomainName'])

        if bucket_match:
            return bucket_match.group()

        domain_match = self.s3_suffix.match(x['DomainName'])

        if domain_match:
            value = x['OriginPath']

            if value.startswith('/'):
                value = value.replace("/", "", 1)

            return value

        return None

    def process(self, resources, event=None):
        results = []

        s3_client = local_session(self.manager.session_factory).client(
            's3', region_name=self.manager.config.region)

        buckets = {b['Name'] for b in s3_client.list_buckets()['Buckets']}

        for r in resources:
            r['c7n:mismatched-s3-origin'] = []
            for x in r['Origins']['Items']:
                if 'S3OriginConfig' in x:
                    bucket_match = self.s3_prefix.match(x['DomainName'])
                    if bucket_match:
                        target_bucket = self.s3_prefix.match(
                            x['DomainName']).group()
                elif 'CustomOriginConfig' in x and self.data.get(
                        'check_custom_origins'):
                    target_bucket = self.is_s3_domain(x)

                if target_bucket is not None and target_bucket not in buckets:
                    self.log.debug(
                        "Bucket %s not found in distribution %s hosting account."
                        % (target_bucket, r['Id']))
                    r['c7n:mismatched-s3-origin'].append(target_bucket)
                    results.append(r)

        return results
示例#39
0
 def process(self, volumes):
     client = local_session(self.manager.session_factory).client('ec2')
     retry = get_retry(['Throttled'], max_attempts=5)
     for vol in volumes:
         vol_id = vol['VolumeId']
         retry(self.process_volume, client=client, volume=vol_id)
示例#40
0
                    'bucket': {
                        'type': 'string'
                    },
                    'regions': {
                        'type': 'array',
                        'items': {
                            'type': 'string'
                        }
                    }
                }
            }
        }
    }
}

retry = get_retry(('Throttling', ), log_retries=True)

indexers = PluginRegistry('policy-metrics-indexers')


class Indexer(object):
    """ Metrics indexer
    """


def get_indexer(config, **kwargs):
    itype = config['indexer']['type']
    klass = indexers.get(itype)
    return klass(config, **kwargs)

示例#41
0
 def process(self, volumes):
     client = local_session(self.manager.session_factory).client('ec2')
     retry = get_retry(['Throttled'], max_attempts=5)
     for vol in volumes:
         vol_id = vol['VolumeId']
         retry(self.process_volume, client=client, volume=vol_id)
示例#42
0
class EMRCluster(QueryResourceManager):
    """Resource manager for Elastic MapReduce clusters
    """
    class resource_type(TypeInfo):
        service = 'emr'
        arn_type = 'emr'
        cluster_states = ['WAITING', 'BOOTSTRAPPING', 'RUNNING', 'STARTING']
        enum_spec = ('list_clusters', 'Clusters', {
            'ClusterStates': cluster_states
        })
        name = 'Name'
        id = 'Id'
        date = "Status.Timeline.CreationDateTime"

    action_registry = actions
    filter_registry = filters
    retry = staticmethod(get_retry(('ThrottlingException', )))

    def __init__(self, ctx, data):
        super(EMRCluster, self).__init__(ctx, data)
        self.queries = QueryFilter.parse(
            self.data.get('query', [{
                'ClusterStates': ['running', 'bootstrapping', 'waiting']
            }]))

    @classmethod
    def get_permissions(cls):
        return ("elasticmapreduce:ListClusters",
                "elasticmapreduce:DescribeCluster")

    def get_resources(self, ids):
        # no filtering by id set supported at the api
        client = local_session(self.session_factory).client('emr')
        results = []
        for jid in ids:
            results.append(client.describe_cluster(ClusterId=jid)['Cluster'])
        return results

    def resources(self, query=None):
        q = self.consolidate_query_filter()
        if q is not None:
            query = query or {}
            for i in range(0, len(q)):
                query[q[i]['Name']] = q[i]['Values']
        return super(EMRCluster, self).resources(query=query)

    def consolidate_query_filter(self):
        result = []
        names = set()
        # allow same name to be specified multiple times and append the queries
        # under the same name
        for q in self.queries:
            query_filter = q.query()
            if query_filter['Name'] in names:
                for filt in result:
                    if query_filter['Name'] == filt['Name']:
                        filt['Values'].extend(query_filter['Values'])
            else:
                names.add(query_filter['Name'])
                result.append(query_filter)
        if 'ClusterStates' not in names:
            # include default query
            result.append({
                'Name': 'ClusterStates',
                'Values': ['WAITING', 'RUNNING', 'BOOTSTRAPPING'],
            })
        return result

    def augment(self, resources):
        client = local_session(
            self.get_resource_manager('emr').session_factory).client('emr')
        result = []
        # remap for cwmetrics
        for r in resources:
            cluster = self.retry(client.describe_cluster,
                                 ClusterId=r['Id'])['Cluster']
            result.append(cluster)
        return result
示例#43
0
class SetShieldProtection(BaseAction):
    """Enable shield protection on applicable resource.

    setting `sync` parameter will also clear out stale shield protections
    for resources that no longer exist.
    """

    permissions = (
        'shield:CreateProtection',
        'shield:ListProtections',
    )
    schema = type_schema('set-shield',
                         state={'type': 'boolean'},
                         sync={'type': 'boolean'})

    retry = staticmethod(get_retry(('ThrottlingException', )))

    def process(self, resources):
        client = local_session(self.manager.session_factory).client(
            'shield', region_name='us-east-1')
        model = self.manager.get_model()
        protections = get_type_protections(client, self.manager.get_model())
        protected_resources = {p['ResourceArn']: p for p in protections}
        state = self.data.get('state', True)

        if self.data.get('sync', False):
            self.clear_stale(client, protections)

        for r in resources:
            arn = self.manager.get_arn(r)
            if state and arn in protected_resources:
                continue
            if state is False and arn in protected_resources:
                self.retry(client.delete_protection,
                           ProtectionId=protected_resources[arn]['Id'])
                continue
            try:
                self.retry(client.create_protection,
                           Name=r[model.name],
                           ResourceArn=arn)
            except ClientError as e:
                if e.response['Error'][
                        'Code'] == 'ResourceAlreadyExistsException':
                    continue
                raise

    def clear_stale(self, client, protections):
        # Get all resources unfiltered
        resources = self.manager.get_resource_manager(
            self.manager.type).resources()
        resource_arns = set(map(self.manager.get_arn, resources))

        pmap = {}
        # Only process stale resources in region for non global resources.
        global_resource = getattr(self.manager.resource_type,
                                  'global_resource', False)
        for p in protections:
            if not global_resource and self.manager.region not in p[
                    'ResourceArn']:
                continue
            pmap[p['ResourceArn']] = p

        # Find any protections for resources that don't exist
        stale = set(pmap).difference(resource_arns)
        self.log.info("clearing %d stale protections", len(stale))
        for s in stale:
            self.retry(client.delete_protection, ProtectionId=pmap[s]['Id'])
示例#44
0
class QueryResourceManager(ResourceManager):

    resource_type = ""

    retry = None

    # TODO Check if we can move to describe source
    max_workers = 3
    chunk_size = 20

    permissions = ()

    _generate_arn = None

    retry = staticmethod(
        get_retry(('ThrottlingException', 'RequestLimitExceeded', 'Throttled',
                   'ThorttlingException', 'Client.RequestLimitExceeded')))

    def __init__(self, data, options):
        super(QueryResourceManager, self).__init__(data, options)
        self.source = self.get_source(self.source_type)

    @property
    def source_type(self):
        return self.data.get('source', 'describe')

    def get_source(self, source_type):
        return sources.get(source_type)(self)

    @classmethod
    def get_model(cls):
        return ResourceQuery.resolve(cls.resource_type)

    @classmethod
    def match_ids(cls, ids):
        """return ids that match this resource type's id format."""
        id_prefix = getattr(cls.get_model(), 'id_prefix', None)
        if id_prefix is not None:
            return [i for i in ids if i.startswith(id_prefix)]
        return ids

    def get_permissions(self):
        perms = self.source.get_permissions()
        if getattr(self, 'permissions', None):
            perms.extend(self.permissions)
        return perms

    def get_cache_key(self, query):
        return {
            'account': self.account_id,
            'region': self.config.region,
            'resource': str(self.__class__.__name__),
            'q': query
        }

    def resources(self, query=None):
        key = self.get_cache_key(query)
        if self._cache.load():
            resources = self._cache.get(key)
            if resources is not None:
                self.log.debug(
                    "Using cached %s: %d" %
                    ("%s.%s" %
                     (self.__class__.__module__, self.__class__.__name__),
                     len(resources)))
                return self.filter_resources(resources)

        if query is None:
            query = {}

        resources = self.augment(self.source.resources(query))
        self._cache.save(key, resources)
        return self.filter_resources(resources)

    def _get_cached_resources(self, ids):
        key = self.get_cache_key(None)
        if self._cache.load():
            resources = self._cache.get(key)
            if resources is not None:
                self.log.debug("Using cached results for get_resources")
                m = self.get_model()
                id_set = set(ids)
                return [r for r in resources if r[m.id] in id_set]
        return None

    def get_resources(self, ids, cache=True, augment=True):
        if cache:
            resources = self._get_cached_resources(ids)
            if resources is not None:
                return resources
        try:
            resources = self.source.get_resources(ids)
            if augment:
                resources = self.augment(resources)
            return resources
        except ClientError as e:
            self.log.warning("event ids not resolved: %s error:%s" % (ids, e))
            return []

    def augment(self, resources):
        """subclasses may want to augment resources with additional information.

        ie. we want tags by default (rds, elb), and policy, location, acl for
        s3 buckets.
        """
        return self.source.augment(resources)

    @property
    def account_id(self):
        """ Return the current account ID.

        This should now be passed in using the --account-id flag, but for a
        period of time we will support the old behavior of inferring this from
        IAM.
        """
        return self.config.account_id

    def get_arns(self, resources):
        arns = []
        for r in resources:
            _id = r[self.get_model().id]
            if 'arn' in _id[:3]:
                arns.append(_id)
            else:
                arns.append(self.generate_arn(_id))
        return arns

    @property
    def generate_arn(self):
        """ Generates generic arn if ID is not already arn format.
        """
        if self._generate_arn is None:
            self._generate_arn = functools.partial(
                generate_arn,
                self.get_model().service,
                region=self.config.region,
                account_id=self.account_id,
                resource_type=self.get_model().type,
                separator='/')
        return self._generate_arn
示例#45
0
 def process(self, volumes):
     client = local_session(self.manager.session_factory).client('ec2')
     retry = get_retry(['Throttled'], max_attempts=5)
     for vol in volumes:
         vol_id = vol['VolumeId']
         retry(client.create_snapshot, VolumeId=vol_id)
示例#46
0
class ConfigSource(object):

    retry = staticmethod(get_retry(('ThrottlingException', )))

    def __init__(self, manager):
        self.manager = manager

    def get_permissions(self):
        return [
            "config:GetResourceConfigHistory", "config:ListDiscoveredResources"
        ]

    def get_resources(self, ids, cache=True):
        client = local_session(self.manager.session_factory).client('config')
        results = []
        m = self.manager.get_model()
        for i in ids:
            revisions = self.retry(client.get_resource_config_history,
                                   resourceId=i,
                                   resourceType=m.config_type,
                                   limit=1).get('configurationItems')
            if not revisions:
                continue
            results.append(self.load_resource(revisions[0]))
        return filter(None, results)

    def load_resource(self, item):
        if isinstance(item['configuration'], six.string_types):
            item_config = json.loads(item['configuration'])
        else:
            item_config = item['configuration']
        return camelResource(item_config)

    def resources(self, query=None):
        client = local_session(self.manager.session_factory).client('config')
        paginator = client.get_paginator('list_discovered_resources')
        pages = paginator.paginate(
            resourceType=self.manager.get_model().config_type)
        results = []

        with self.manager.executor_factory(max_workers=5) as w:
            ridents = pager(pages, self.retry)
            resource_ids = [
                r['resourceId'] for r in ridents.get('resourceIdentifiers', ())
            ]
            self.manager.log.debug("querying %d %s resources",
                                   len(resource_ids),
                                   self.manager.__class__.__name__.lower())

            for resource_set in chunks(resource_ids, 50):
                futures = []
                futures.append(w.submit(self.get_resources, resource_set))
                for f in as_completed(futures):
                    if f.exception():
                        self.manager.log.error(
                            "Exception getting resources from config \n %s" %
                            (f.exception()))
                    results.extend(f.result())
        return results

    def augment(self, resources):
        return resources
示例#47
0
class ConfigSource(object):

    retry = staticmethod(get_retry(('ThrottlingException', )))

    def __init__(self, manager):
        self.manager = manager

    def get_permissions(self):
        return [
            "config:GetResourceConfigHistory", "config:ListDiscoveredResources"
        ]

    def get_resources(self, ids, cache=True):
        client = local_session(self.manager.session_factory).client('config')
        results = []
        m = self.manager.get_model()
        for i in ids:
            revisions = self.retry(client.get_resource_config_history,
                                   resourceId=i,
                                   resourceType=m.config_type,
                                   limit=1).get('configurationItems')
            if not revisions:
                continue
            results.append(self.load_resource(revisions[0]))
        return list(filter(None, results))

    def get_query_params(self, query):
        """Parse config select expression from policy and parameter.

        On policy config supports a full statement being given, or
        a clause that will be added to the where expression.

        If no query is specified, a default query is utilized.

        A valid query should at minimum select fields
        for configuration, supplementaryConfiguration and
        must have resourceType qualifier.
        """
        if query and not isinstance(query, dict):
            raise PolicyExecutionError("invalid config source query %s" %
                                       (query, ))

        if query is None and 'query' in self.manager.data:
            _q = [q for q in self.manager.data['query'] if 'expr' in q]
            if _q:
                query = _q.pop()

        if query is None and 'query' in self.manager.data:
            _c = [
                q['clause'] for q in self.manager.data['query']
                if 'clause' in q
            ]
            if _c:
                _c = _c.pop()
        elif query:
            return query
        else:
            _c = None

        s = "select configuration, supplementaryConfiguration where resourceType = '{}'".format(
            self.manager.resource_type.config_type)

        if _c:
            s += "AND {}".format(_c)

        return {'expr': s}

    def load_resource(self, item):
        if isinstance(item['configuration'], six.string_types):
            item_config = json.loads(item['configuration'])
        else:
            item_config = item['configuration']
        return camelResource(item_config)

    def resources(self, query=None):
        client = local_session(self.manager.session_factory).client('config')
        query = self.get_query_params(query)
        pager = Paginator(
            client.select_resource_config, {
                'input_token': 'NextToken',
                'output_token': 'NextToken',
                'result_key': 'Results'
            },
            client.meta.service_model.operation_model('SelectResourceConfig'))
        pager.PAGE_ITERATOR_CLS = RetryPageIterator

        results = []
        for page in pager.paginate(Expression=query['expr']):
            results.extend(
                [self.load_resource(json.loads(r)) for r in page['Results']])
        return results

    def augment(self, resources):
        return resources
示例#48
0
class QueryResourceManager(ResourceManager):

    resource_type = ""

    # TODO Check if we can move to describe source
    max_workers = 3
    chunk_size = 20

    permissions = ()

    _generate_arn = None

    retry = staticmethod(
        get_retry(('ThrottlingException', 'RequestLimitExceeded', 'Throttled',
                   'Throttling', 'Client.RequestLimitExceeded')))

    def __init__(self, data, options):
        super(QueryResourceManager, self).__init__(data, options)
        self.source = self.get_source(self.source_type)

    @property
    def source_type(self):
        return self.data.get('source', 'describe')

    def get_source(self, source_type):
        return sources.get(source_type)(self)

    @classmethod
    def has_arn(cls):
        if getattr(cls.resource_type, 'arn', None):
            return True
        elif getattr(cls.resource_type, 'type', None) is not None:
            return True
        elif cls.__dict__.get('get_arns'):
            return True
        return False

    @classmethod
    def get_model(cls):
        return ResourceQuery.resolve(cls.resource_type)

    @classmethod
    def match_ids(cls, ids):
        """return ids that match this resource type's id format."""
        id_prefix = getattr(cls.get_model(), 'id_prefix', None)
        if id_prefix is not None:
            return [i for i in ids if i.startswith(id_prefix)]
        return ids

    def get_permissions(self):
        perms = self.source.get_permissions()
        if getattr(self, 'permissions', None):
            perms.extend(self.permissions)
        return perms

    def get_cache_key(self, query):
        return {
            'account': self.account_id,
            'region': self.config.region,
            'resource': str(self.__class__.__name__),
            'source': self.source_type,
            'q': query
        }

    def resources(self, query=None):
        query = self.source.get_query_params(query)
        cache_key = self.get_cache_key(query)
        resources = None

        if self._cache.load():
            resources = self._cache.get(cache_key)
            if resources is not None:
                self.log.debug(
                    "Using cached %s: %d" %
                    ("%s.%s" %
                     (self.__class__.__module__, self.__class__.__name__),
                     len(resources)))

        if resources is None:
            if query is None:
                query = {}
            with self.ctx.tracer.subsegment('resource-fetch'):
                resources = self.source.resources(query)
            with self.ctx.tracer.subsegment('resource-augment'):
                resources = self.augment(resources)
            self._cache.save(cache_key, resources)

        resource_count = len(resources)
        with self.ctx.tracer.subsegment('filter'):
            resources = self.filter_resources(resources)

        # Check if we're out of a policies execution limits.
        if self.data == self.ctx.policy.data:
            self.check_resource_limit(len(resources), resource_count)
        return resources

    def check_resource_limit(self, selection_count, population_count):
        """Check if policy's execution affects more resources then its limit.

        Ideally this would be at a higher level but we've hidden
        filtering behind the resource manager facade for default usage.
        """
        p = self.ctx.policy
        max_resource_limits = MaxResourceLimit(p, selection_count,
                                               population_count)
        return max_resource_limits.check_resource_limits()

    def _get_cached_resources(self, ids):
        key = self.get_cache_key(None)
        if self._cache.load():
            resources = self._cache.get(key)
            if resources is not None:
                self.log.debug("Using cached results for get_resources")
                m = self.get_model()
                id_set = set(ids)
                return [r for r in resources if r[m.id] in id_set]
        return None

    def get_resources(self, ids, cache=True, augment=True):
        if cache:
            resources = self._get_cached_resources(ids)
            if resources is not None:
                return resources
        try:
            resources = self.source.get_resources(ids)
            if augment:
                resources = self.augment(resources)
            return resources
        except ClientError as e:
            self.log.warning("event ids not resolved: %s error:%s" % (ids, e))
            return []

    def augment(self, resources):
        """subclasses may want to augment resources with additional information.

        ie. we want tags by default (rds, elb), and policy, location, acl for
        s3 buckets.
        """
        return self.source.augment(resources)

    @property
    def account_id(self):
        """ Return the current account ID.

        This should now be passed in using the --account-id flag, but for a
        period of time we will support the old behavior of inferring this from
        IAM.
        """
        return self.config.account_id

    @property
    def region(self):
        """ Return the current region.
        """
        return self.config.region

    def get_arns(self, resources):
        arns = []

        m = self.get_model()
        arn_key = getattr(m, 'arn', None)
        if arn_key is False:
            raise ValueError("%s do not have arns" % self.type)

        id_key = m.id

        for r in resources:
            _id = r[id_key]
            if arn_key:
                arns.append(r[arn_key])
            elif 'arn' in _id[:3]:
                arns.append(_id)
            else:
                arns.append(self.generate_arn(_id))
        return arns

    @property
    def generate_arn(self):
        """ Generates generic arn if ID is not already arn format.
        """
        if self._generate_arn is None:
            self._generate_arn = functools.partial(
                generate_arn,
                self.get_model().service,
                region=self.config.region,
                account_id=self.config.account_id,
                resource_type=self.get_model().type,
                separator='/')
        return self._generate_arn
示例#49
0
    def process_image(self, image):
        retry = get_retry((
            'RequestLimitExceeded', 'Client.RequestLimitExceeded'))

        client = local_session(self.manager.session_factory).client('ec2')
        retry(client.deregister_image, ImageId=image['ImageId'])
示例#50
0
def export(group, bucket, prefix, start, end, role, session=None):
    """Export a single log group to s3."""
    start = start and isinstance(start, basestring) and parse(start) or start
    end = (end and isinstance(start, basestring) and parse(end) or end
           or datetime.now())
    start = start.replace(tzinfo=tzlocal()).astimezone(tzutc())
    end = end.replace(tzinfo=tzlocal()).astimezone(tzutc())

    if session is None:
        session = get_session(role)

    client = session.client('logs')
    retry = get_retry(('LimitExceededException', ), min_delay=4)

    if prefix:
        prefix = "%s/%s" % (prefix.rstrip('/'),
                            group['logGroupName'].strip('/'))
    else:
        prefix = group

    log.debug("Log exporting group:%s start:%s end:%s bucket:%s prefix:%s",
              group, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'),
              bucket, prefix)

    t = time.time()
    days = [start + timedelta(i) for i in range((end - start).days)]
    day_count = len(days)
    days = filter_extant_exports(boto3.Session().client('s3'), bucket, prefix,
                                 days, start, end)

    log.debug("Filtering s3 extant keys from %d to %d in %0.2f", day_count,
              len(days),
              time.time() - t)
    t = time.time()

    for idx, d in enumerate(days):
        date = d.replace(minute=0, microsecond=0, hour=0)
        export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d"))
        params = {
            'taskName':
            "%s-%s" % ("c7n-log-exporter", date.strftime("%Y-%m-%d")),
            'logGroupName':
            group['logGroupName'],
            'fromTime':
            int(
                time.mktime(
                    date.replace(minute=0, microsecond=0, hour=0).timetuple())
                * 1000),
            'to':
            int(
                time.mktime(
                    date.replace(minute=59, hour=23,
                                 microsecond=0).timetuple()) * 1000),
            'destination':
            bucket,
            'destinationPrefix':
            export_prefix
        }

        # if stream_prefix:
        #    params['logStreamPrefix'] = stream_prefix

        result = retry(client.create_export_task, **params)
        log.debug("Log export group:%s day:%s bucket:%s prefix:%s task:%s",
                  group, params['taskName'], bucket,
                  params['destinationPrefix'], result['taskId'])

    log.info(("Exported log group:%s time:%0.2f days:%d start:%s"
              " end:%s bucket:%s prefix:%s"), group,
             time.time() - t, idx, start.strftime('%Y/%m/%d'),
             end.strftime('%Y/%m/%d'), bucket, prefix)