def process_asg(self, asg): """Multistep process to stop an asg aprori of setup - suspend processes - stop instances """ session = local_session(self.manager.session_factory) asg_client = session.client('autoscaling') try: self.manager.retry( asg_client.suspend_processes, AutoScalingGroupName=asg['AutoScalingGroupName']) except ClientError as e: if e.response['Error']['Code'] == 'ValidationError': return raise ec2_client = session.client('ec2') try: instance_ids = [i['InstanceId'] for i in asg['Instances']] if not instance_ids: return retry = get_retry(( 'RequestLimitExceeded', 'Client.RequestLimitExceeded')) retry(ec2_client.stop_instances, InstanceIds=instance_ids) except ClientError as e: if e.response['Error']['Code'] in ( 'InvalidInstanceID.NotFound', 'IncorrectInstanceState'): log.warning("Erroring stopping asg instances %s %s" % ( asg['AutoScalingGroupName'], e)) return raise
def process_resource(self, target, key, tags, snapshot): p = {} if key: p['KmsKeyId'] = key p['TargetDBSnapshotIdentifier'] = snapshot[ 'DBSnapshotIdentifier'].replace(':', '-') p['SourceRegion'] = self.manager.config.region p['SourceDBSnapshotIdentifier'] = snapshot['DBSnapshotArn'] if self.data.get('copy_tags', True): p['CopyTags'] = True if tags: p['Tags'] = tags retry = get_retry( ('SnapshotQuotaExceeded',), # TODO make this configurable, class defaults to 1hr min_delay=self.min_delay, max_attempts=self.max_attempts, log_retries=logging.DEBUG) try: result = retry(target.copy_db_snapshot, **p) except ClientError as e: if e.response['Error']['Code'] == 'DBSnapshotAlreadyExists': self.log.warning( "Snapshot %s already exists in target region", snapshot['DBSnapshotIdentifier']) return raise snapshot['c7n:CopiedSnapshot'] = result[ 'DBSnapshot']['DBSnapshotArn']
def __new__(cls, name, parents, attrs): if 'resource_type' not in attrs: return super(QueryMeta, cls).__new__(cls, name, parents, attrs) if 'filter_registry' not in attrs: attrs['filter_registry'] = FilterRegistry( '%s.filters' % name.lower()) if 'action_registry' not in attrs: attrs['action_registry'] = ActionRegistry( '%s.filters' % name.lower()) if attrs['resource_type']: m = ResourceQuery.resolve(attrs['resource_type']) # Generic cloud watch metrics support if m.dimension: attrs['filter_registry'].register('metrics', MetricsFilter) # EC2 Service boilerplate ... if m.service == 'ec2': # Generic ec2 retry attrs['retry'] = staticmethod(get_retry(( 'RequestLimitExceeded', 'Client.RequestLimitExceeded'))) # Generic ec2 resource tag support if getattr(m, 'taggable', True): register_ec2_tags( attrs['filter_registry'], attrs['action_registry']) if getattr(m, 'universal_taggable', False): register_universal_tags( attrs['filter_registry'], attrs['action_registry']) return super(QueryMeta, cls).__new__(cls, name, parents, attrs)
def process_stack(self, stack): client = local_session( self.manager.session_factory).client('opsworks') try: stack_id = stack['StackId'] for app in client.describe_apps(StackId=stack_id)['Apps']: client.delete_app(AppId=app['AppId']) instances = client.describe_instances(StackId=stack_id)['Instances'] orig_length = len(instances) instances = self.filter_instance_state(instances) if(len(instances) != orig_length): self.log.exception( "All instances must be stopped before deletion. Stack Id: %s Name: %s." % (stack_id, stack['Name'])) return for instance in instances: instance_id = instance['InstanceId'] # Validation Exception raised for instances that are stopping when delete is called retryable = ('ValidationException'), retry = utils.get_retry(retryable, max_attempts=8) try: retry(client.delete_instance, InstanceId=instance_id) except ClientError as e2: if e2.response['Error']['Code'] in retryable: return True raise for layer in client.describe_layers(StackId=stack_id)['Layers']: client.delete_layer(LayerId=layer['LayerId']) client.delete_stack(StackId=stack_id) except ClientError as e: self.log.exception( "Exception deleting stack:\n %s" % e)
def __init__(self, log_group=__name__, log_stream=None, session_factory=None): super(CloudWatchLogHandler, self).__init__() self.log_group = log_group self.log_stream = log_stream self.session_factory = session_factory or boto3.Session self.transport = None self.queue = Queue.Queue() self.threads = [] # do some basic buffering before sending to transport to minimize # queue/threading overhead self.buf = [] self.last_seen = time.time() # Logging module internally is tracking all handlers, for final # cleanup atexit, custodian is a bit more explicitly scoping shutdown to # each policy, so use a sentinel value to avoid deadlocks. self.shutdown = False retry = get_retry(('ThrottlingException',)) try: client = self.session_factory().client('logs') logs = retry( client.describe_log_groups, logGroupNamePrefix=self.log_group)['logGroups'] if not [l for l in logs if l['logGroupName'] == self.log_group]: retry(client.create_log_group, logGroupName=self.log_group) except ClientError as e: if Error.code(e) != Error.ResourceExists: raise
def test_retry_passthrough(self): def func(): return 42 retry = utils.get_retry((), 5) self.assertEqual(retry(func), 42)
def assumed_session(role_arn, session_name, session=None, region=None, external_id=None): """STS Role assume a boto3.Session With automatic credential renewal. Args: role_arn: iam role arn to assume session_name: client session identifier session: an optional extant session, note session is captured in a function closure for renewing the sts assumed role. :return: a boto3 session using the sts assumed role credentials Notes: We have to poke at botocore internals a few times """ if session is None: session = Session() retry = get_retry(('Throttling',)) def refresh(): parameters = {"RoleArn": role_arn, "RoleSessionName": session_name} if external_id is not None: parameters['ExternalId'] = external_id credentials = retry( session.client('sts').assume_role, **parameters)['Credentials'] return dict( access_key=credentials['AccessKeyId'], secret_key=credentials['SecretAccessKey'], token=credentials['SessionToken'], # Silly that we basically stringify so it can be parsed again expiry_time=credentials['Expiration'].isoformat()) session_credentials = RefreshableCredentials.create_from_metadata( metadata=refresh(), refresh_using=refresh, method='sts-assume-role') # so dirty.. it hurts, no clean way to set this outside of the # internals poke. There's some work upstream on making this nicer # but its pretty baroque as well with upstream support. # https://github.com/boto/boto3/issues/443 # https://github.com/boto/botocore/issues/761 s = get_session() s._credentials = session_credentials if region is None: region = s.get_config_variable('region') or 'us-east-1' s.set_config_variable('region', region) return Session(botocore_session=s)
def process_instance_set(self, client, instances): # Setup retry with insufficient capacity as well retryable = ('InsufficientInstanceCapacity', 'RequestLimitExceeded', 'Client.RequestLimitExceeded'), retry = utils.get_retry(retryable, max_attempts=5) instance_ids = [i['InstanceId'] for i in instances] try: retry(client.reboot_instances, InstanceIds=instance_ids) except ClientError as e: if e.response['Error']['Code'] in retryable: return True raise
def resume_asg_instances(self, asg): """Resume asg instances. """ session = local_session(self.manager.session_factory) ec2_client = session.client('ec2') instance_ids = [i['InstanceId'] for i in asg['Instances']] if not instance_ids: return retry = get_retry(( 'RequestLimitExceeded', 'Client.RequestLimitExceeded')) retry(ec2_client.start_instances, InstanceIds=instance_ids)
def process(self, resources): client = local_session(self.manager.session_factory).client('ecs') retry = get_retry(('Throttling',)) for r in resources: try: retry(client.deregister_task_definition, taskDefinition=r['taskDefinitionArn']) except ClientError as e: # No error code for not found. if e.response['Error'][ 'Message'] != 'The specified task definition does not exist.': raise
def process_snapshot_set(self, client, snapshots_set): retry = get_retry(( 'RequestLimitExceeded', 'Client.RequestLimitExceeded')) for s in snapshots_set: if s['SnapshotId'] in self.image_snapshots: continue try: retry(client.delete_snapshot, SnapshotId=s['SnapshotId'], DryRun=self.manager.config.dryrun) except ClientError as e: if e.response['Error']['Code'] == "InvalidSnapshot.NotFound": continue raise
def process(self, resources): client = local_session(self.manager.session_factory).client('ecs') retry = get_retry(('Throttling',)) reason = self.data.get('reason', 'custodian policy') for r in resources: try: retry(client.stop_task, cluster=r['clusterArn'], task=r['taskArn'], reason=reason) except ClientError as e: # No error code for not found. if e.response['Error']['Message'] != "The referenced task was not found.": raise
def test_retry_errors(self): self.patch(time, 'sleep', lambda x: x) self.count = 0 def func(): self.count += 1 raise ClientError({'Error': {'Code': 42}}, 'something') retry = utils.get_retry((42,), 5) try: retry(func) except ClientError: self.assertEqual(self.count, 5) else: self.fail("should have raised")
def test_retry_errors(self): self.patch(time, "sleep", lambda x: x) self.count = 0 def func(): self.count += 1 raise ClientError({"Error": {"Code": 42}}, "something") retry = utils.get_retry((42,), 5) try: retry(func) except ClientError: self.assertEqual(self.count, 5) else: self.fail("should have raised")
def process(self, resources): client = local_session(self.manager.session_factory).client('ecs') retry = get_retry(('Throttling',)) for r in resources: try: primary = [d for d in r['deployments'] if d['status'] == 'PRIMARY'].pop() if primary['desiredCount'] > 0: retry(client.update_service, cluster=r['clusterArn'], service=r['serviceName'], desiredCount=0) retry(client.delete_service, cluster=r['clusterArn'], service=r['serviceName']) except ClientError as e: if e.response['Error']['Code'] != 'ServiceNotFoundException': raise
def process_instance_set(self, client, instances, itype, izone): # Setup retry with insufficient capacity as well retry = utils.get_retry(( 'InsufficientInstanceCapacity', 'RequestLimitExceeded', 'Client.RequestLimitExceeded'), max_attempts=5) instance_ids = [i['InstanceId'] for i in instances] try: retry(client.start_instances, InstanceIds=instance_ids) except ClientError as e: if e.response['Error']['Code'] == 'InsufficientInstanceCapacity': self.log.exception( ("Could not start instances:%d type:%s" " zone:%s instances:%s error:%s"), len(instances), itype, izone, ", ".join(instance_ids), e) return self.log.exception("Error while starting instances error %s", e) raise
def filter_last_write(client, groups, start): """Filter log groups where the last write was before the start date. """ retry = get_retry(('ThrottlingException',)) def process_group(group_set): matched = [] for g in group_set: streams = retry( client.describe_log_streams, logGroupName=g['logGroupName'], orderBy='LastEventTime', limit=1, descending=True) if not streams.get('logStreams'): continue stream = streams['logStreams'][0] if stream['storedBytes'] == 0 and datetime.fromtimestamp( stream['creationTime'] / 1000) > start: matched.append(g) elif 'lastIngestionTime' in stream and datetime.fromtimestamp( stream['lastIngestionTime'] / 1000) > start: matched.append(g) return matched results = [] with ThreadPoolExecutor(max_workers=3) as w: futures = {} for group_set in chunks(groups, 10): futures[w.submit(process_group, group_set)] = group_set for f in as_completed(futures): if f.exception(): log.error( "Error processing groupset:%s error:%s", group_set, f.exception()) results.extend(f.result()) return results
def get_related_ids(self, resources): if self.efs_group_cache: group_ids = set() for r in resources: group_ids.update( self.efs_group_cache.get(r['MountTargetId'], ())) return list(group_ids) client = local_session(self.manager.session_factory).client('efs') groups = {} group_ids = set() retry = get_retry(('Throttled',), 12) for r in resources: groups[r['MountTargetId']] = retry( client.describe_mount_target_security_groups, MountTargetId=r['MountTargetId'])['SecurityGroups'] group_ids.update(groups[r['MountTargetId']]) self.efs_group_cache = groups return list(group_ids)
from datetime import datetime, timedelta from dateutil.parser import parse from dateutil.tz import tzutc import itertools from c7n.actions import BaseAction as Action, AutoTagUser from c7n.filters import Filter, OPERATORS, FilterValidationError from c7n import utils DEFAULT_TAG = "maid_status" universal_tag_retry = utils.get_retry(( 'Throttled', 'RequestLimitExceeded', 'Client.RequestLimitExceeded' )) def register_ec2_tags(filters, actions): filters.register('marked-for-op', TagActionFilter) filters.register('tag-count', TagCountFilter) actions.register('auto-tag-user', AutoTagUser) actions.register('mark-for-op', TagDelayedAction) actions.register('tag-trim', TagTrim) actions.register('mark', Tag) actions.register('tag', Tag)
}, client.meta.service_model.operation_model('ListProtections')) def get_type_protections(client, model): pager = get_protections_paginator(client) pager.PAGE_ITERATOR_CLS = RetryPageIterator try: protections = pager.paginate().build_full_result().get( 'Protections', []) except client.exceptions.ResourceNotFoundException: # shield is not enabled in the account, so all resources are not protected return [] return [p for p in protections if model.arn_type in p['ResourceArn']] ShieldRetry = get_retry(('ThrottlingException', )) class IsShieldProtected(Filter): permissions = ('shield:ListProtections', ) schema = type_schema('shield-enabled', state={'type': 'boolean'}) def process(self, resources, event=None): client = local_session(self.manager.session_factory).client( 'shield', region_name='us-east-1') protections = get_type_protections(client, self.manager.get_model()) protected_resources = {p['ResourceArn'] for p in protections} state = self.data.get('state', False)
# limitations under the License. """ S3 Key Encrypt on Bucket Changes """ from __future__ import absolute_import, division, print_function, unicode_literals import json import urllib import boto3 from botocore.exceptions import ClientError from c7n.resources.s3 import EncryptExtantKeys from c7n.utils import get_retry s3 = config = None retry = get_retry(['404', '503'], max_attempts=4, min_delay=2) def init(): global s3, config if s3 is not None: return s3 = boto3.client('s3') with open('config.json') as fh: config = json.load(fh) # multipart copy can on multigb file can take a long time config['large'] = False def process_key_event(event, context):
def export(group, bucket, prefix, start, end, role, session=None): start = start and isinstance(start, basestring) and parse(start) or start end = (end and isinstance(start, basestring) and parse(end) or end or datetime.now()) start = start.replace(tzinfo=tzlocal()).astimezone(tzutc()) end = end.replace(tzinfo=tzlocal()).astimezone(tzutc()) if session is None: session = get_session(role) client = session.client('logs') retry = get_retry(('LimitExceededException',), min_delay=4) if prefix: prefix = "%s/%s" % (prefix.rstrip('/'), group['logGroupName'].strip('/')) else: prefix = group log.debug("Log exporting group:%s start:%s end:%s bucket:%s prefix:%s", group, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix) t = time.time() days = [start + timedelta(i) for i in range((end - start).days)] day_count = len(days) days = filter_extant_exports( boto3.Session().client('s3'), bucket, prefix, days, start, end) log.debug("Filtering s3 extant keys from %d to %d in %0.2f", day_count, len(days), time.time() - t) t = time.time() for idx, d in enumerate(days): date = d.replace(minute=0, microsecond=0, hour=0) export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d")) params = { 'taskName': "%s-%s" % ("c7n-log-exporter", date.strftime("%Y-%m-%d")), 'logGroupName': group['logGroupName'], 'fromTime': int(time.mktime( date.replace( minute=0, microsecond=0, hour=0).timetuple()) * 1000), 'to': int(time.mktime( date.replace( minute=59, hour=23, microsecond=0).timetuple()) * 1000), 'destination': bucket, 'destinationPrefix': export_prefix } # if stream_prefix: # params['logStreamPrefix'] = stream_prefix result = retry(client.create_export_task, **params) log.debug("Log export group:%s day:%s bucket:%s prefix:%s task:%s", group, params['taskName'], bucket, params['destinationPrefix'], result['taskId']) log.info(("Exported log group:%s time:%0.2f days:%d start:%s" " end:%s bucket:%s prefix:%s"), group, time.time() - t, idx, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix)
{"required": ['role']} ], 'required': ['name', 'bucket', 'regions', 'title', 'id'], 'properties': { 'name': {'type': 'string'}, 'title': {'type': 'string'}, 'tags': {'type': 'object'}, 'bucket': {'type': 'string'}, 'regions': {'type': 'array', 'items': {'type': 'string'}} } } } } } retry = get_retry(('Throttling',), log_retries=True) indexers = PluginRegistry('policy-metrics-indexers') class Indexer(object): """ Metrics indexer """ def get_indexer(config, **kwargs): itype = config['indexer']['type'] klass = indexers.get(itype) return klass(config, **kwargs)
def process(self, volumes): client = local_session(self.manager.session_factory).client('ec2') retry = get_retry(['Throttled'], max_attempts=5) for vol in volumes: vol_id = vol['VolumeId'] retry(client.create_snapshot, VolumeId=vol_id)
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ S3 Key Encrypt on Bucket Changes """ import json import boto3 from botocore.exceptions import ClientError from c7n.resources.s3 import EncryptExtantKeys from c7n.utils import get_retry s3 = config = None retry = get_retry(['404', '503']) def init(): global s3, config if s3 is not None: return s3 = boto3.client('s3') with open('config.json') as fh: config = json.load(fh) # multipart copy can on multigb file can take a long time config['large'] = False def process_key_event(event, context):
def process(self, resources): client = local_session(self.manager.session_factory).client('efs') self.unmount_filesystems(resources) retry = get_retry(('FileSystemInUse',), 12) for r in resources: retry(client.delete_file_system, FileSystemId=r['FileSystemId'])
def export(group, bucket, prefix, start, end, role, poll_period=120, session=None, name=""): """export a given log group to s3""" start = start and isinstance(start, six.string_types) and parse(start) or start end = (end and isinstance(start, six.string_types) and parse(end) or end or datetime.now()) start = start.replace(tzinfo=tzlocal()).astimezone(tzutc()) end = end.replace(tzinfo=tzlocal()).astimezone(tzutc()) if session is None: session = get_session(role) client = session.client('logs') paginator = client.get_paginator('describe_log_groups') for p in paginator.paginate(): found = False for _group in p['logGroups']: if _group['logGroupName'] == group: group = _group found = True break if found: break if not found: raise ValueError("Log group %s not found." % group) if prefix: prefix = "%s/%s" % (prefix.rstrip('/'), group['logGroupName'].strip('/')) else: prefix = group['logGroupName'] named_group = "%s:%s" % (name, group['logGroupName']) log.info( "Log exporting group:%s start:%s end:%s bucket:%s prefix:%s size:%s", named_group, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix, group['storedBytes']) t = time.time() days = [( start + timedelta(i)).replace(minute=0, hour=0, second=0, microsecond=0) for i in range((end - start).days)] day_count = len(days) s3 = boto3.Session().client('s3') days = filter_extant_exports(s3, bucket, prefix, days, start, end) log.info("Group:%s filtering s3 extant keys from %d to %d start:%s end:%s", named_group, day_count, len(days), days[0] if days else '', days[-1] if days else '') t = time.time() retry = get_retry(('SlowDown',)) for idx, d in enumerate(days): date = d.replace(minute=0, microsecond=0, hour=0) export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d")) params = { 'taskName': "%s-%s" % ("c7n-log-exporter", date.strftime("%Y-%m-%d")), 'logGroupName': group['logGroupName'], 'fromTime': int(time.mktime( date.replace( minute=0, microsecond=0, hour=0).timetuple()) * 1000), 'to': int(time.mktime( date.replace( minute=59, hour=23, microsecond=0).timetuple()) * 1000), 'destination': bucket, 'destinationPrefix': export_prefix } # if stream_prefix: # params['logStreamPrefix'] = stream_prefix try: s3.head_object(Bucket=bucket, Key=prefix) except ClientError as e: if e.response['Error']['Code'] != '404': # Not Found raise s3.put_object( Bucket=bucket, Key=prefix, Body=json.dumps({}), ACL="bucket-owner-full-control", ServerSideEncryption="AES256") t = time.time() counter = 0 while True: counter += 1 try: result = client.create_export_task(**params) except ClientError as e: if e.response['Error']['Code'] == 'LimitExceededException': time.sleep(poll_period) # log every 30m of export waiting if counter % 6 == 0: log.debug( "group:%s day:%s waiting for %0.2f minutes", named_group, d.strftime('%Y-%m-%d'), (counter * poll_period) / 60.0) continue raise retry( s3.put_object_tagging, Bucket=bucket, Key=prefix, Tagging={ 'TagSet': [{ 'Key': 'LastExport', 'Value': d.isoformat()}]}) break log.info( "Log export time:%0.2f group:%s day:%s bucket:%s prefix:%s task:%s", time.time() - t, named_group, d.strftime("%Y-%m-%d"), bucket, params['destinationPrefix'], result['taskId']) log.info( ("Exported log group:%s time:%0.2f days:%d start:%s" " end:%s bucket:%s prefix:%s"), named_group, time.time() - t, len(days), start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix)
from concurrent.futures import as_completed from datetime import datetime, timedelta from dateutil import zoneinfo from dateutil.parser import parse import itertools from c7n.actions import BaseAction as Action, AutoTagUser from c7n.filters import Filter, OPERATORS, FilterValidationError from c7n.filters.offhours import Time from c7n import utils DEFAULT_TAG = "maid_status" universal_tag_retry = utils.get_retry( ('Throttled', 'RequestLimitExceeded', 'Client.RequestLimitExceeded')) def register_ec2_tags(filters, actions): filters.register('marked-for-op', TagActionFilter) filters.register('tag-count', TagCountFilter) actions.register('auto-tag-user', AutoTagUser) actions.register('mark-for-op', TagDelayedAction) actions.register('tag-trim', TagTrim) actions.register('mark', Tag) actions.register('tag', Tag) actions.register('unmark', RemoveTag) actions.register('untag', RemoveTag)
class ConfigSource: retry = staticmethod(get_retry(('ThrottlingException', ))) def __init__(self, manager): self.manager = manager def get_permissions(self): return [ "config:GetResourceConfigHistory", "config:ListDiscoveredResources" ] def get_resources(self, ids, cache=True): client = local_session(self.manager.session_factory).client('config') results = [] m = self.manager.get_model() for i in ids: revisions = self.retry(client.get_resource_config_history, resourceId=i, resourceType=m.config_type, limit=1).get('configurationItems') if not revisions: continue results.append(self.load_resource(revisions[0])) return list(filter(None, results)) def get_query_params(self, query): """Parse config select expression from policy and parameter. On policy config supports a full statement being given, or a clause that will be added to the where expression. If no query is specified, a default query is utilized. A valid query should at minimum select fields for configuration, supplementaryConfiguration and must have resourceType qualifier. """ if query and not isinstance(query, dict): raise PolicyExecutionError("invalid config source query %s" % (query, )) if query is None and 'query' in self.manager.data: _q = [q for q in self.manager.data['query'] if 'expr' in q] if _q: query = _q.pop() if query is None and 'query' in self.manager.data: _c = [ q['clause'] for q in self.manager.data['query'] if 'clause' in q ] if _c: _c = _c.pop() elif query: return query else: _c = None s = "select configuration, supplementaryConfiguration where resourceType = '{}'".format( self.manager.resource_type.config_type) if _c: s += "AND {}".format(_c) return {'expr': s} def load_resource(self, item): if isinstance(item['configuration'], str): item_config = json.loads(item['configuration']) else: item_config = item['configuration'] resource = camelResource(item_config, implicitDate=True) # normalized tag loading across the many variants of config's inconsistencies. if ((item.get('tags') or item['supplementaryConfiguration'].get('Tags')) and 'Tags' not in resource): if item.get('tags'): resource['Tags'] = [{ u'Key': k, u'Value': v } for k, v in item['tags'].items()] else: # config has a bit more variation on tags (serialized json, list, dict, etc) stags = item['supplementaryConfiguration']['Tags'] if isinstance(stags, str): stags = json.loads(stags) if isinstance(stags, list): resource['Tags'] = [{ u'Key': t['key'], u'Value': t['value'] } for t in stags] elif isinstance(stags, dict): resource['Tags'] = [{ u'Key': k, u'Value': v } for k, v in stags.items()] return resource def get_listed_resources(self, client): # fallback for when config decides to arbitrarily break select # resource for a given resource type. paginator = client.get_paginator('list_discovered_resources') paginator.PAGE_ITERATOR_CLS = RetryPageIterator pages = paginator.paginate( resourceType=self.manager.get_model().config_type) results = [] with self.manager.executor_factory(max_workers=2) as w: ridents = pages.build_full_result() resource_ids = [ r['resourceId'] for r in ridents.get('resourceIdentifiers', ()) ] self.manager.log.debug("querying %d %s resources", len(resource_ids), self.manager.__class__.__name__.lower()) for resource_set in chunks(resource_ids, 50): futures = [] futures.append(w.submit(self.get_resources, resource_set)) for f in as_completed(futures): if f.exception(): self.manager.log.error( "Exception getting resources from config \n %s" % (f.exception())) results.extend(f.result()) return results def resources(self, query=None): client = local_session(self.manager.session_factory).client('config') query = self.get_query_params(query) pager = Paginator( client.select_resource_config, { 'input_token': 'NextToken', 'output_token': 'NextToken', 'result_key': 'Results' }, client.meta.service_model.operation_model('SelectResourceConfig')) pager.PAGE_ITERATOR_CLS = RetryPageIterator results = [] for page in pager.paginate(Expression=query['expr']): results.extend( [self.load_resource(json.loads(r)) for r in page['Results']]) # Config arbitrarily breaks which resource types its supports for query/select # on any given day, if we don't have a user defined query, then fallback # to iteration mode. if not results and query == self.get_query_params({}): results = self.get_listed_resources(client) return results def augment(self, resources): return resources
def export(group, bucket, prefix, start, end, role, poll_period=120, session=None, name="", region=None): """export a given log group to s3""" start = start and isinstance(start, six.string_types) and parse(start) or start end = (end and isinstance(start, six.string_types) and parse(end) or end or datetime.now()) start = start.replace(tzinfo=tzlocal()).astimezone(tzutc()) end = end.replace(tzinfo=tzlocal()).astimezone(tzutc()) if session is None: session = get_session(role, region) client = session.client('logs') paginator = client.get_paginator('describe_log_groups') for p in paginator.paginate(): found = False for _group in p['logGroups']: if _group['logGroupName'] == group: group = _group found = True break if found: break if not found: raise ValueError("Log group %s not found." % group) if prefix: prefix = "%s/%s" % (prefix.rstrip('/'), group['logGroupName'].strip('/')) else: prefix = group['logGroupName'] named_group = "%s:%s" % (name, group['logGroupName']) log.info( "Log exporting group:%s start:%s end:%s bucket:%s prefix:%s size:%s", named_group, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix, group['storedBytes']) t = time.time() days = [( start + timedelta(i)).replace(minute=0, hour=0, second=0, microsecond=0) for i in range((end - start).days)] day_count = len(days) s3 = boto3.Session().client('s3') days = filter_extant_exports(s3, bucket, prefix, days, start, end) log.info("Group:%s filtering s3 extant keys from %d to %d start:%s end:%s", named_group, day_count, len(days), days[0] if days else '', days[-1] if days else '') t = time.time() retry = get_retry(('SlowDown',)) for idx, d in enumerate(days): date = d.replace(minute=0, microsecond=0, hour=0) export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d")) params = { 'taskName': "%s-%s" % ("c7n-log-exporter", date.strftime("%Y-%m-%d")), 'logGroupName': group['logGroupName'], 'fromTime': int(time.mktime( date.replace( minute=0, microsecond=0, hour=0).timetuple()) * 1000), 'to': int(time.mktime( date.replace( minute=59, hour=23, microsecond=0).timetuple()) * 1000), 'destination': bucket, 'destinationPrefix': export_prefix } # if stream_prefix: # params['logStreamPrefix'] = stream_prefix try: s3.head_object(Bucket=bucket, Key=prefix) except ClientError as e: if e.response['Error']['Code'] != '404': # Not Found raise s3.put_object( Bucket=bucket, Key=prefix, Body=json.dumps({}), ACL="bucket-owner-full-control", ServerSideEncryption="AES256") t = time.time() counter = 0 while True: counter += 1 try: result = client.create_export_task(**params) except ClientError as e: if e.response['Error']['Code'] == 'LimitExceededException': time.sleep(poll_period) # log every 30m of export waiting if counter % 6 == 0: log.debug( "group:%s day:%s waiting for %0.2f minutes", named_group, d.strftime('%Y-%m-%d'), (counter * poll_period) / 60.0) continue raise retry( s3.put_object_tagging, Bucket=bucket, Key=prefix, Tagging={ 'TagSet': [{ 'Key': 'LastExport', 'Value': d.isoformat()}]}) break log.info( "Log export time:%0.2f group:%s day:%s bucket:%s prefix:%s task:%s", time.time() - t, named_group, d.strftime("%Y-%m-%d"), bucket, params['destinationPrefix'], result['taskId']) log.info( ("Exported log group:%s time:%0.2f days:%d start:%s" " end:%s bucket:%s prefix:%s"), named_group, time.time() - t, len(days), start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix)
# limitations under the License. """ S3 Key Encrypt on Bucket Changes """ from __future__ import absolute_import, division, print_function, unicode_literals import json import boto3 from botocore.exceptions import ClientError from c7n.resources.s3 import EncryptExtantKeys from c7n.utils import get_retry s3 = config = None retry = get_retry(['404', '503'], max_attempts=4, min_delay=2) def init(): global s3, config if s3 is not None: return s3 = boto3.client('s3') with open('config.json') as fh: config = json.load(fh) # multipart copy can on multigb file can take a long time config['large'] = False def process_key_event(event, context):
def process(self, resources): client = local_session(self.manager.session_factory).client('efs') self.unmount_filesystems(resources) retry = get_retry(('FileSystemInUse',), 12) for r in resources: retry(client.delete_file_system, FileSystemId=r['FileSystemId'])
{'input_token': 'NextToken', 'output_token': 'NextToken', 'result_key': 'Protections'}, client.meta.service_model.operation_model('ListProtections')) def get_type_protections(client, model): pager = get_protections_paginator(client) pager.PAGE_ITERATOR_CLS = RetryPageIterator try: protections = pager.paginate().build_full_result().get('Protections', []) except client.exceptions.ResourceNotFoundException: # shield is not enabled in the account, so all resources are not protected return [] return [p for p in protections if model.type in p['ResourceArn']] ShieldRetry = get_retry(('ThrottlingException',)) class IsShieldProtected(Filter): permissions = ('shield:ListProtections',) schema = type_schema('shield-enabled', state={'type': 'boolean'}) def process(self, resources, event=None): client = local_session(self.manager.session_factory).client( 'shield', region_name='us-east-1') protections = get_type_protections(client, self.manager.get_model()) protected_resources = {p['ResourceArn'] for p in protections} state = self.data.get('state', False)
class MetricsOutput(object): """Send metrics data to cloudwatch """ permissions = ("cloudWatch:PutMetricData", ) retry = staticmethod(get_retry(('Throttling', ))) BUFFER_SIZE = 20 @staticmethod def select(metrics_selector): if not metrics_selector: return NullMetricsOutput # Compatibility for boolean configuration if isinstance(metrics_selector, bool): metrics_selector = 'aws' for k in metrics_outputs.keys(): if k.startswith(metrics_selector): return metrics_outputs[k] raise ValueError("invalid metrics option %r" % metrics_selector) def __init__(self, ctx, namespace=DEFAULT_NAMESPACE): self.ctx = ctx self.namespace = namespace self.buf = [] def get_timestamp(self): """ Now, if C7N_METRICS_TZ is set to TRUE, UTC timestamp will be used. For backwards compatibility, if it is not set, UTC will be the default. To disable this and use the system's time zone, C7N_METRICS_TZ shoule be set to FALSE. """ if os.getenv("C7N_METRICS_TZ", 'TRUE').upper() in ('TRUE', ''): return datetime.datetime.utcnow() else: return datetime.datetime.now() def flush(self): if self.buf: self._put_metrics(self.namespace, self.buf) self.buf = [] def put_metric(self, key, value, unit, buffer=True, **dimensions): point = self._format_metric(key, value, unit, dimensions) self.buf.append(point) if buffer: # Max metrics in a single request if len(self.buf) == 20: self.flush() else: self.flush() def _format_metric(self, key, value, unit, dimensions): d = { "MetricName": key, "Timestamp": self.get_timestamp(), "Value": value, "Unit": unit } d["Dimensions"] = [{ "Name": "Policy", "Value": self.ctx.policy.name }, { "Name": "ResType", "Value": self.ctx.policy.resource_type }] for k, v in dimensions.items(): d['Dimensions'].append({"Name": k, "Value": v}) return d def _put_metrics(self, ns, metrics): watch = local_session(self.ctx.session_factory).client('cloudwatch') for metric_values in chunks(metrics, self.BUFFER_SIZE): return self.retry(watch.put_metric_data, Namespace=ns, MetricData=metrics)
def process_image(self, image): retry = get_retry( ('RequestLimitExceeded', 'Client.RequestLimitExceeded')) client = local_session(self.manager.session_factory).client('ec2') retry(client.deregister_image, ImageId=image['ImageId'])
def test_retry_passthrough(self): def func(): return 42 retry = utils.get_retry((), 5) self.assertEqual(retry(func), 42)
class MetricsOutput(object): """Send metrics data to cloudwatch """ permissions = ("cloudWatch:PutMetricData", ) retry = staticmethod(get_retry(('Throttling', ))) @staticmethod def select(metrics_enabled): if metrics_enabled: return MetricsOutput return NullMetricsOutput def __init__(self, ctx, namespace=DEFAULT_NAMESPACE): self.ctx = ctx self.namespace = namespace self.buf = [] def get_timestamp(self): """ Now, if C7N_METRICS_TZ is set to TRUE, UTC timestamp will be used. For backwards compatibility, if it is not set, UTC will be the default. To disable this and use the system's time zone, C7N_METRICS_TZ shoule be set to FALSE. """ if os.getenv("C7N_METRICS_TZ", '').upper() in ('TRUE', ''): return datetime.datetime.utcnow() else: return datetime.datetime.now() def flush(self): if self.buf: self._put_metrics(self.namespace, self.buf) self.buf = [] def put_metric(self, key, value, unit, buffer=False, **dimensions): d = { "MetricName": key, "Timestamp": self.get_timestamp(), "Value": value, "Unit": unit } d["Dimensions"] = [{ "Name": "Policy", "Value": self.ctx.policy.name }, { "Name": "ResType", "Value": self.ctx.policy.resource_type }] for k, v in dimensions.items(): d['Dimensions'].append({"Name": k, "Value": v}) if buffer: self.buf.append(d) # Max metrics in a single request if len(self.buf) == 20: self.flush() else: self._put_metrics(self.namespace, [d]) def _put_metrics(self, ns, metrics): watch = local_session(self.ctx.session_factory).client('cloudwatch') return self.retry(watch.put_metric_data, Namespace=ns, MetricData=metrics)
class MismatchS3Origin(Filter): """Check for existence of S3 bucket referenced by Cloudfront, and verify whether owner is different from Cloudfront account owner. :example: .. code-block:: yaml policies: - name: mismatch-s3-origin resource: distribution filters: - type: mismatch-s3-origin check_custom_origins: true """ s3_prefix = re.compile(r'.*(?=\.s3(-.*)?\.amazonaws.com)') s3_suffix = re.compile(r'^([^.]+\.)?s3(-.*)?\.amazonaws.com') schema = type_schema('mismatch-s3-origin', check_custom_origins={'type': 'boolean'}) permissions = ('s3:ListBuckets', ) retry = staticmethod(get_retry(('Throttling', ))) def is_s3_domain(self, x): bucket_match = self.s3_prefix.match(x['DomainName']) if bucket_match: return bucket_match.group() domain_match = self.s3_suffix.match(x['DomainName']) if domain_match: value = x['OriginPath'] if value.startswith('/'): value = value.replace("/", "", 1) return value return None def process(self, resources, event=None): results = [] s3_client = local_session(self.manager.session_factory).client( 's3', region_name=self.manager.config.region) buckets = {b['Name'] for b in s3_client.list_buckets()['Buckets']} for r in resources: r['c7n:mismatched-s3-origin'] = [] for x in r['Origins']['Items']: if 'S3OriginConfig' in x: bucket_match = self.s3_prefix.match(x['DomainName']) if bucket_match: target_bucket = self.s3_prefix.match( x['DomainName']).group() elif 'CustomOriginConfig' in x and self.data.get( 'check_custom_origins'): target_bucket = self.is_s3_domain(x) if target_bucket is not None and target_bucket not in buckets: self.log.debug( "Bucket %s not found in distribution %s hosting account." % (target_bucket, r['Id'])) r['c7n:mismatched-s3-origin'].append(target_bucket) results.append(r) return results
def process(self, volumes): client = local_session(self.manager.session_factory).client('ec2') retry = get_retry(['Throttled'], max_attempts=5) for vol in volumes: vol_id = vol['VolumeId'] retry(self.process_volume, client=client, volume=vol_id)
'bucket': { 'type': 'string' }, 'regions': { 'type': 'array', 'items': { 'type': 'string' } } } } } } } retry = get_retry(('Throttling', ), log_retries=True) indexers = PluginRegistry('policy-metrics-indexers') class Indexer(object): """ Metrics indexer """ def get_indexer(config, **kwargs): itype = config['indexer']['type'] klass = indexers.get(itype) return klass(config, **kwargs)
def process(self, volumes): client = local_session(self.manager.session_factory).client('ec2') retry = get_retry(['Throttled'], max_attempts=5) for vol in volumes: vol_id = vol['VolumeId'] retry(self.process_volume, client=client, volume=vol_id)
class EMRCluster(QueryResourceManager): """Resource manager for Elastic MapReduce clusters """ class resource_type(TypeInfo): service = 'emr' arn_type = 'emr' cluster_states = ['WAITING', 'BOOTSTRAPPING', 'RUNNING', 'STARTING'] enum_spec = ('list_clusters', 'Clusters', { 'ClusterStates': cluster_states }) name = 'Name' id = 'Id' date = "Status.Timeline.CreationDateTime" action_registry = actions filter_registry = filters retry = staticmethod(get_retry(('ThrottlingException', ))) def __init__(self, ctx, data): super(EMRCluster, self).__init__(ctx, data) self.queries = QueryFilter.parse( self.data.get('query', [{ 'ClusterStates': ['running', 'bootstrapping', 'waiting'] }])) @classmethod def get_permissions(cls): return ("elasticmapreduce:ListClusters", "elasticmapreduce:DescribeCluster") def get_resources(self, ids): # no filtering by id set supported at the api client = local_session(self.session_factory).client('emr') results = [] for jid in ids: results.append(client.describe_cluster(ClusterId=jid)['Cluster']) return results def resources(self, query=None): q = self.consolidate_query_filter() if q is not None: query = query or {} for i in range(0, len(q)): query[q[i]['Name']] = q[i]['Values'] return super(EMRCluster, self).resources(query=query) def consolidate_query_filter(self): result = [] names = set() # allow same name to be specified multiple times and append the queries # under the same name for q in self.queries: query_filter = q.query() if query_filter['Name'] in names: for filt in result: if query_filter['Name'] == filt['Name']: filt['Values'].extend(query_filter['Values']) else: names.add(query_filter['Name']) result.append(query_filter) if 'ClusterStates' not in names: # include default query result.append({ 'Name': 'ClusterStates', 'Values': ['WAITING', 'RUNNING', 'BOOTSTRAPPING'], }) return result def augment(self, resources): client = local_session( self.get_resource_manager('emr').session_factory).client('emr') result = [] # remap for cwmetrics for r in resources: cluster = self.retry(client.describe_cluster, ClusterId=r['Id'])['Cluster'] result.append(cluster) return result
class SetShieldProtection(BaseAction): """Enable shield protection on applicable resource. setting `sync` parameter will also clear out stale shield protections for resources that no longer exist. """ permissions = ( 'shield:CreateProtection', 'shield:ListProtections', ) schema = type_schema('set-shield', state={'type': 'boolean'}, sync={'type': 'boolean'}) retry = staticmethod(get_retry(('ThrottlingException', ))) def process(self, resources): client = local_session(self.manager.session_factory).client( 'shield', region_name='us-east-1') model = self.manager.get_model() protections = get_type_protections(client, self.manager.get_model()) protected_resources = {p['ResourceArn']: p for p in protections} state = self.data.get('state', True) if self.data.get('sync', False): self.clear_stale(client, protections) for r in resources: arn = self.manager.get_arn(r) if state and arn in protected_resources: continue if state is False and arn in protected_resources: self.retry(client.delete_protection, ProtectionId=protected_resources[arn]['Id']) continue try: self.retry(client.create_protection, Name=r[model.name], ResourceArn=arn) except ClientError as e: if e.response['Error'][ 'Code'] == 'ResourceAlreadyExistsException': continue raise def clear_stale(self, client, protections): # Get all resources unfiltered resources = self.manager.get_resource_manager( self.manager.type).resources() resource_arns = set(map(self.manager.get_arn, resources)) pmap = {} # Only process stale resources in region for non global resources. global_resource = getattr(self.manager.resource_type, 'global_resource', False) for p in protections: if not global_resource and self.manager.region not in p[ 'ResourceArn']: continue pmap[p['ResourceArn']] = p # Find any protections for resources that don't exist stale = set(pmap).difference(resource_arns) self.log.info("clearing %d stale protections", len(stale)) for s in stale: self.retry(client.delete_protection, ProtectionId=pmap[s]['Id'])
class QueryResourceManager(ResourceManager): resource_type = "" retry = None # TODO Check if we can move to describe source max_workers = 3 chunk_size = 20 permissions = () _generate_arn = None retry = staticmethod( get_retry(('ThrottlingException', 'RequestLimitExceeded', 'Throttled', 'ThorttlingException', 'Client.RequestLimitExceeded'))) def __init__(self, data, options): super(QueryResourceManager, self).__init__(data, options) self.source = self.get_source(self.source_type) @property def source_type(self): return self.data.get('source', 'describe') def get_source(self, source_type): return sources.get(source_type)(self) @classmethod def get_model(cls): return ResourceQuery.resolve(cls.resource_type) @classmethod def match_ids(cls, ids): """return ids that match this resource type's id format.""" id_prefix = getattr(cls.get_model(), 'id_prefix', None) if id_prefix is not None: return [i for i in ids if i.startswith(id_prefix)] return ids def get_permissions(self): perms = self.source.get_permissions() if getattr(self, 'permissions', None): perms.extend(self.permissions) return perms def get_cache_key(self, query): return { 'account': self.account_id, 'region': self.config.region, 'resource': str(self.__class__.__name__), 'q': query } def resources(self, query=None): key = self.get_cache_key(query) if self._cache.load(): resources = self._cache.get(key) if resources is not None: self.log.debug( "Using cached %s: %d" % ("%s.%s" % (self.__class__.__module__, self.__class__.__name__), len(resources))) return self.filter_resources(resources) if query is None: query = {} resources = self.augment(self.source.resources(query)) self._cache.save(key, resources) return self.filter_resources(resources) def _get_cached_resources(self, ids): key = self.get_cache_key(None) if self._cache.load(): resources = self._cache.get(key) if resources is not None: self.log.debug("Using cached results for get_resources") m = self.get_model() id_set = set(ids) return [r for r in resources if r[m.id] in id_set] return None def get_resources(self, ids, cache=True, augment=True): if cache: resources = self._get_cached_resources(ids) if resources is not None: return resources try: resources = self.source.get_resources(ids) if augment: resources = self.augment(resources) return resources except ClientError as e: self.log.warning("event ids not resolved: %s error:%s" % (ids, e)) return [] def augment(self, resources): """subclasses may want to augment resources with additional information. ie. we want tags by default (rds, elb), and policy, location, acl for s3 buckets. """ return self.source.augment(resources) @property def account_id(self): """ Return the current account ID. This should now be passed in using the --account-id flag, but for a period of time we will support the old behavior of inferring this from IAM. """ return self.config.account_id def get_arns(self, resources): arns = [] for r in resources: _id = r[self.get_model().id] if 'arn' in _id[:3]: arns.append(_id) else: arns.append(self.generate_arn(_id)) return arns @property def generate_arn(self): """ Generates generic arn if ID is not already arn format. """ if self._generate_arn is None: self._generate_arn = functools.partial( generate_arn, self.get_model().service, region=self.config.region, account_id=self.account_id, resource_type=self.get_model().type, separator='/') return self._generate_arn
def process(self, volumes): client = local_session(self.manager.session_factory).client('ec2') retry = get_retry(['Throttled'], max_attempts=5) for vol in volumes: vol_id = vol['VolumeId'] retry(client.create_snapshot, VolumeId=vol_id)
class ConfigSource(object): retry = staticmethod(get_retry(('ThrottlingException', ))) def __init__(self, manager): self.manager = manager def get_permissions(self): return [ "config:GetResourceConfigHistory", "config:ListDiscoveredResources" ] def get_resources(self, ids, cache=True): client = local_session(self.manager.session_factory).client('config') results = [] m = self.manager.get_model() for i in ids: revisions = self.retry(client.get_resource_config_history, resourceId=i, resourceType=m.config_type, limit=1).get('configurationItems') if not revisions: continue results.append(self.load_resource(revisions[0])) return filter(None, results) def load_resource(self, item): if isinstance(item['configuration'], six.string_types): item_config = json.loads(item['configuration']) else: item_config = item['configuration'] return camelResource(item_config) def resources(self, query=None): client = local_session(self.manager.session_factory).client('config') paginator = client.get_paginator('list_discovered_resources') pages = paginator.paginate( resourceType=self.manager.get_model().config_type) results = [] with self.manager.executor_factory(max_workers=5) as w: ridents = pager(pages, self.retry) resource_ids = [ r['resourceId'] for r in ridents.get('resourceIdentifiers', ()) ] self.manager.log.debug("querying %d %s resources", len(resource_ids), self.manager.__class__.__name__.lower()) for resource_set in chunks(resource_ids, 50): futures = [] futures.append(w.submit(self.get_resources, resource_set)) for f in as_completed(futures): if f.exception(): self.manager.log.error( "Exception getting resources from config \n %s" % (f.exception())) results.extend(f.result()) return results def augment(self, resources): return resources
class ConfigSource(object): retry = staticmethod(get_retry(('ThrottlingException', ))) def __init__(self, manager): self.manager = manager def get_permissions(self): return [ "config:GetResourceConfigHistory", "config:ListDiscoveredResources" ] def get_resources(self, ids, cache=True): client = local_session(self.manager.session_factory).client('config') results = [] m = self.manager.get_model() for i in ids: revisions = self.retry(client.get_resource_config_history, resourceId=i, resourceType=m.config_type, limit=1).get('configurationItems') if not revisions: continue results.append(self.load_resource(revisions[0])) return list(filter(None, results)) def get_query_params(self, query): """Parse config select expression from policy and parameter. On policy config supports a full statement being given, or a clause that will be added to the where expression. If no query is specified, a default query is utilized. A valid query should at minimum select fields for configuration, supplementaryConfiguration and must have resourceType qualifier. """ if query and not isinstance(query, dict): raise PolicyExecutionError("invalid config source query %s" % (query, )) if query is None and 'query' in self.manager.data: _q = [q for q in self.manager.data['query'] if 'expr' in q] if _q: query = _q.pop() if query is None and 'query' in self.manager.data: _c = [ q['clause'] for q in self.manager.data['query'] if 'clause' in q ] if _c: _c = _c.pop() elif query: return query else: _c = None s = "select configuration, supplementaryConfiguration where resourceType = '{}'".format( self.manager.resource_type.config_type) if _c: s += "AND {}".format(_c) return {'expr': s} def load_resource(self, item): if isinstance(item['configuration'], six.string_types): item_config = json.loads(item['configuration']) else: item_config = item['configuration'] return camelResource(item_config) def resources(self, query=None): client = local_session(self.manager.session_factory).client('config') query = self.get_query_params(query) pager = Paginator( client.select_resource_config, { 'input_token': 'NextToken', 'output_token': 'NextToken', 'result_key': 'Results' }, client.meta.service_model.operation_model('SelectResourceConfig')) pager.PAGE_ITERATOR_CLS = RetryPageIterator results = [] for page in pager.paginate(Expression=query['expr']): results.extend( [self.load_resource(json.loads(r)) for r in page['Results']]) return results def augment(self, resources): return resources
class QueryResourceManager(ResourceManager): resource_type = "" # TODO Check if we can move to describe source max_workers = 3 chunk_size = 20 permissions = () _generate_arn = None retry = staticmethod( get_retry(('ThrottlingException', 'RequestLimitExceeded', 'Throttled', 'Throttling', 'Client.RequestLimitExceeded'))) def __init__(self, data, options): super(QueryResourceManager, self).__init__(data, options) self.source = self.get_source(self.source_type) @property def source_type(self): return self.data.get('source', 'describe') def get_source(self, source_type): return sources.get(source_type)(self) @classmethod def has_arn(cls): if getattr(cls.resource_type, 'arn', None): return True elif getattr(cls.resource_type, 'type', None) is not None: return True elif cls.__dict__.get('get_arns'): return True return False @classmethod def get_model(cls): return ResourceQuery.resolve(cls.resource_type) @classmethod def match_ids(cls, ids): """return ids that match this resource type's id format.""" id_prefix = getattr(cls.get_model(), 'id_prefix', None) if id_prefix is not None: return [i for i in ids if i.startswith(id_prefix)] return ids def get_permissions(self): perms = self.source.get_permissions() if getattr(self, 'permissions', None): perms.extend(self.permissions) return perms def get_cache_key(self, query): return { 'account': self.account_id, 'region': self.config.region, 'resource': str(self.__class__.__name__), 'source': self.source_type, 'q': query } def resources(self, query=None): query = self.source.get_query_params(query) cache_key = self.get_cache_key(query) resources = None if self._cache.load(): resources = self._cache.get(cache_key) if resources is not None: self.log.debug( "Using cached %s: %d" % ("%s.%s" % (self.__class__.__module__, self.__class__.__name__), len(resources))) if resources is None: if query is None: query = {} with self.ctx.tracer.subsegment('resource-fetch'): resources = self.source.resources(query) with self.ctx.tracer.subsegment('resource-augment'): resources = self.augment(resources) self._cache.save(cache_key, resources) resource_count = len(resources) with self.ctx.tracer.subsegment('filter'): resources = self.filter_resources(resources) # Check if we're out of a policies execution limits. if self.data == self.ctx.policy.data: self.check_resource_limit(len(resources), resource_count) return resources def check_resource_limit(self, selection_count, population_count): """Check if policy's execution affects more resources then its limit. Ideally this would be at a higher level but we've hidden filtering behind the resource manager facade for default usage. """ p = self.ctx.policy max_resource_limits = MaxResourceLimit(p, selection_count, population_count) return max_resource_limits.check_resource_limits() def _get_cached_resources(self, ids): key = self.get_cache_key(None) if self._cache.load(): resources = self._cache.get(key) if resources is not None: self.log.debug("Using cached results for get_resources") m = self.get_model() id_set = set(ids) return [r for r in resources if r[m.id] in id_set] return None def get_resources(self, ids, cache=True, augment=True): if cache: resources = self._get_cached_resources(ids) if resources is not None: return resources try: resources = self.source.get_resources(ids) if augment: resources = self.augment(resources) return resources except ClientError as e: self.log.warning("event ids not resolved: %s error:%s" % (ids, e)) return [] def augment(self, resources): """subclasses may want to augment resources with additional information. ie. we want tags by default (rds, elb), and policy, location, acl for s3 buckets. """ return self.source.augment(resources) @property def account_id(self): """ Return the current account ID. This should now be passed in using the --account-id flag, but for a period of time we will support the old behavior of inferring this from IAM. """ return self.config.account_id @property def region(self): """ Return the current region. """ return self.config.region def get_arns(self, resources): arns = [] m = self.get_model() arn_key = getattr(m, 'arn', None) if arn_key is False: raise ValueError("%s do not have arns" % self.type) id_key = m.id for r in resources: _id = r[id_key] if arn_key: arns.append(r[arn_key]) elif 'arn' in _id[:3]: arns.append(_id) else: arns.append(self.generate_arn(_id)) return arns @property def generate_arn(self): """ Generates generic arn if ID is not already arn format. """ if self._generate_arn is None: self._generate_arn = functools.partial( generate_arn, self.get_model().service, region=self.config.region, account_id=self.config.account_id, resource_type=self.get_model().type, separator='/') return self._generate_arn
def process_image(self, image): retry = get_retry(( 'RequestLimitExceeded', 'Client.RequestLimitExceeded')) client = local_session(self.manager.session_factory).client('ec2') retry(client.deregister_image, ImageId=image['ImageId'])
def export(group, bucket, prefix, start, end, role, session=None): """Export a single log group to s3.""" start = start and isinstance(start, basestring) and parse(start) or start end = (end and isinstance(start, basestring) and parse(end) or end or datetime.now()) start = start.replace(tzinfo=tzlocal()).astimezone(tzutc()) end = end.replace(tzinfo=tzlocal()).astimezone(tzutc()) if session is None: session = get_session(role) client = session.client('logs') retry = get_retry(('LimitExceededException', ), min_delay=4) if prefix: prefix = "%s/%s" % (prefix.rstrip('/'), group['logGroupName'].strip('/')) else: prefix = group log.debug("Log exporting group:%s start:%s end:%s bucket:%s prefix:%s", group, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix) t = time.time() days = [start + timedelta(i) for i in range((end - start).days)] day_count = len(days) days = filter_extant_exports(boto3.Session().client('s3'), bucket, prefix, days, start, end) log.debug("Filtering s3 extant keys from %d to %d in %0.2f", day_count, len(days), time.time() - t) t = time.time() for idx, d in enumerate(days): date = d.replace(minute=0, microsecond=0, hour=0) export_prefix = "%s%s" % (prefix, date.strftime("/%Y/%m/%d")) params = { 'taskName': "%s-%s" % ("c7n-log-exporter", date.strftime("%Y-%m-%d")), 'logGroupName': group['logGroupName'], 'fromTime': int( time.mktime( date.replace(minute=0, microsecond=0, hour=0).timetuple()) * 1000), 'to': int( time.mktime( date.replace(minute=59, hour=23, microsecond=0).timetuple()) * 1000), 'destination': bucket, 'destinationPrefix': export_prefix } # if stream_prefix: # params['logStreamPrefix'] = stream_prefix result = retry(client.create_export_task, **params) log.debug("Log export group:%s day:%s bucket:%s prefix:%s task:%s", group, params['taskName'], bucket, params['destinationPrefix'], result['taskId']) log.info(("Exported log group:%s time:%0.2f days:%d start:%s" " end:%s bucket:%s prefix:%s"), group, time.time() - t, idx, start.strftime('%Y/%m/%d'), end.strftime('%Y/%m/%d'), bucket, prefix)