class Report(object): def __init__(self, app, date): settings = queries.get_app_interface_settings() self.secret_reader = SecretReader(settings=settings) # standard date format if hasattr(date, 'strftime'): date = date.strftime('%Y-%m-%d') self.app = app self.date = date self.report_sections = {} # valet # Pending https://issues.redhat.com/browse/APPSRE-1674 # self.add_report_section('valet', self.slo_section()) # promotions self.add_report_section( 'production_promotions', self.get_activity_content(self.app.get('promotions'))) # merges to master self.add_report_section( 'merges_to_master', self.get_activity_content(self.app.get('merge_activity'))) # Container Vulnerabilities self.add_report_section( 'container_vulnerabilities', self.get_vulnerability_content( self.app.get('container_vulnerabilities'))) # Post-deploy Jobs self.add_report_section( 'post_deploy_jobs', self.get_post_deploy_jobs_content( self.app.get('post_deploy_jobs'))) @property def path(self): return 'data/reports/{}/{}.yml'.format(self.app['name'], self.date) def content(self): return { '$schema': '/app-sre/report-1.yml', 'labels': { 'app': self.app['name'] }, 'name': f"{self.app['name']}-{self.date}", 'app': { '$ref': self.app['path'] }, 'date': self.date, 'contentFormatVersion': CONTENT_FORMAT_VERSION, 'content': yaml.safe_dump(self.report_sections, sort_keys=False) } def to_yaml(self): return yaml.safe_dump(self.content(), sort_keys=False) def to_message(self): return {'file_path': self.path, 'content': self.to_yaml()} def add_report_section(self, header, content): if not content: content = None self.report_sections[header] = content def slo_section(self): performance_parameters = [ pp for pp in get_performance_parameters() if pp['app']['path'] == self.app['path'] ] metrics_availability = self.get_performance_metrics( performance_parameters, self.calculate_performance_availability, 'availability') metrics_latency = self.get_performance_metrics( performance_parameters, self.calculate_performance_latency, 'latency') metrics = [*metrics_availability, *metrics_latency] if not metrics: return None return metrics @staticmethod def get_vulnerability_content(container_vulnerabilities): parsed_metrics = [] if not container_vulnerabilities: return parsed_metrics for cluster, namespaces in container_vulnerabilities.items(): for namespace, severities in namespaces.items(): parsed_metrics.append({ 'cluster': cluster, 'namespace': namespace, 'vulnerabilities': severities }) return parsed_metrics @staticmethod def get_post_deploy_jobs_content(post_deploy_jobs): results = [] if not post_deploy_jobs: return results for cluster, namespaces in post_deploy_jobs.items(): for namespace, post_deploy_job in namespaces.items(): results.append({ 'cluster': cluster, 'namespace': namespace, 'post_deploy_job': post_deploy_job }) return results def get_performance_metrics(self, performance_parameters, method, field): return [ method(pp['component'], ns, metric) for pp in performance_parameters for ns in pp['namespaces'] for metric in pp.get(field, []) if metric['kind'] == 'SLO' if ns['cluster']['prometheus'] ] def calculate_performance_availability(self, component, ns, metric): metric_selectors = json.loads(metric['selectors']) metric_name = metric['metric'] prom_info = ns['cluster']['prometheus'] prom_auth_creds = self.secret_reader.read(prom_info['auth']) prom_auth = requests.auth.HTTPBasicAuth(*prom_auth_creds.split(':')) # volume vol_selectors = metric_selectors.copy() vol_selectors['namespace'] = ns['name'] prom_vol_selectors = self.promqlify(vol_selectors) vol_promql_query = (f"sum(increase({metric_name}" f"{{{prom_vol_selectors}}}[30d]))") vol_promql_query_result = promql( prom_info['url'], vol_promql_query, auth=prom_auth, ) if len(vol_promql_query_result) != 1: logging.error(("unexpected promql result:\n" f"url: {prom_info['url']}\n" f"query: {vol_promql_query}")) return None volume = int(float(vol_promql_query_result[0]['value'][1])) # availability avail_selectors = metric_selectors.copy() avail_selectors['namespace'] = ns['name'] prom_avail_selectors = self.promqlify(avail_selectors) avail_promql_query = f""" sum(increase( {metric_name}{{{prom_avail_selectors}, code!~"5.."}}[30d] )) / sum(increase( {metric_name}{{{prom_avail_selectors}}}[30d] )) * 100 """ avail_promql_query_result = promql( prom_info['url'], avail_promql_query, auth=prom_auth, ) if len(avail_promql_query_result) != 1: logging.error(("unexpected promql result:\n" f"url: {prom_info['url']}\n" f"query: {avail_promql_query}")) return None availability = float(avail_promql_query_result[0]['value'][1]) target_slo = 100 - float(metric['errorBudget']) availability_slo_met = availability >= target_slo return { 'component': component, 'type': 'availability', 'selectors': self.promqlify(metric_selectors), 'total_requests': volume, 'availability': round(availability, 2), 'availability_slo_met': availability_slo_met, } def calculate_performance_latency(self, component, ns, metric): metric_selectors = json.loads(metric['selectors']) metric_name = metric['metric'] selectors = metric_selectors.copy() selectors['namespace'] = ns['name'] prom_info = ns['cluster']['prometheus'] prom_auth_creds = self.secret_reader.read(prom_info['auth']) prom_auth = requests.auth.HTTPBasicAuth(*prom_auth_creds.split(':')) percentile = float(metric['percentile']) / 100 prom_selectors = self.promqlify(selectors) promql_query = f""" histogram_quantile({percentile}, sum by (le) (increase( {metric_name}{{ {prom_selectors}, code!~"5.." }}[30d])) ) """ result = promql( prom_info['url'], promql_query, auth=prom_auth, ) if len(result) != 1: logging.error(("unexpected promql result:\n" f"url: {prom_info['url']}\n" f"query: {promql_query}")) return None latency = float(result[0]['value'][1]) latency_slo_met = latency <= float(metric['threshold']) return { 'component': component, 'type': 'latency', 'selectors': self.promqlify(metric_selectors), 'latency': round(latency, 2), 'latency_slo_met': latency_slo_met, } @staticmethod def promqlify(selectors): return ", ".join([f'{k}="{v}"' for k, v in selectors.items()]) @staticmethod def get_activity_content(activity): if not activity: return [] return [{ "repo": repo, "total": int(results[0]), "success": int(results[1]), } for repo, results in activity.items()]
class AWSApi: """Wrapper around AWS SDK""" def __init__(self, thread_pool_size, accounts, settings=None, init_ecr_auth_tokens=False): self.thread_pool_size = thread_pool_size self.secret_reader = SecretReader(settings=settings) self.init_sessions_and_resources(accounts) if init_ecr_auth_tokens: self.init_ecr_auth_tokens(accounts) self.init_users() self._lock = Lock() self.resource_types = \ ['s3', 'sqs', 'dynamodb', 'rds', 'rds_snapshots'] # store the app-interface accounts in a dictionary indexed by name self.accounts = {acc['name']: acc for acc in accounts} def init_sessions_and_resources(self, accounts): results = threaded.run(self.get_tf_secrets, accounts, self.thread_pool_size) self.sessions = {} self.resources = {} for account, secret in results: access_key = secret['aws_access_key_id'] secret_key = secret['aws_secret_access_key'] region_name = secret['region'] session = boto3.Session( aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region_name, ) self.sessions[account] = session self.resources[account] = {} def get_session(self, account): return self.sessions[account] def get_tf_secrets(self, account): account_name = account['name'] automation_token = account['automationToken'] secret = self.secret_reader.read_all(automation_token) return (account_name, secret) def init_users(self): self.users = {} for account, s in self.sessions.items(): iam = s.client('iam') users = [u['UserName'] for u in iam.list_users()['Users']] self.users[account] = users def simulate_deleted_users(self, io_dir): src_integrations = ['terraform_resources', 'terraform_users'] if not os.path.exists(io_dir): return for i in src_integrations: file_path = os.path.join(io_dir, i + '.json') if not os.path.exists(file_path): continue with open(file_path, 'r') as f: deleted_users = json.load(f) for deleted_user in deleted_users: delete_from_account = deleted_user['account'] delete_user = deleted_user['user'] self.users[delete_from_account].remove(delete_user) def map_resources(self): threaded.run(self.map_resource, self.resource_types, self.thread_pool_size) def map_resource(self, resource_type): if resource_type == 's3': self.map_s3_resources() elif resource_type == 'sqs': self.map_sqs_resources() elif resource_type == 'dynamodb': self.map_dynamodb_resources() elif resource_type == 'rds': self.map_rds_resources() elif resource_type == 'rds_snapshots': self.map_rds_snapshots() elif resource_type == 'route53': self.map_route53_resources() else: raise InvalidResourceTypeError(resource_type) def map_s3_resources(self): for account, s in self.sessions.items(): s3 = s.client('s3') buckets_list = s3.list_buckets() if 'Buckets' not in buckets_list: continue buckets = [b['Name'] for b in buckets_list['Buckets']] self.set_resouces(account, 's3', buckets) buckets_without_owner = \ self.get_resources_without_owner(account, buckets) unfiltered_buckets = \ self.custom_s3_filter(account, s3, buckets_without_owner) self.set_resouces(account, 's3_no_owner', unfiltered_buckets) def map_sqs_resources(self): for account, s in self.sessions.items(): sqs = s.client('sqs') queues_list = sqs.list_queues() if 'QueueUrls' not in queues_list: continue queues = queues_list['QueueUrls'] self.set_resouces(account, 'sqs', queues) queues_without_owner = \ self.get_resources_without_owner(account, queues) unfiltered_queues = \ self.custom_sqs_filter(account, sqs, queues_without_owner) self.set_resouces(account, 'sqs_no_owner', unfiltered_queues) def map_dynamodb_resources(self): for account, s in self.sessions.items(): dynamodb = s.client('dynamodb') tables = self.paginate(dynamodb, 'list_tables', 'TableNames') self.set_resouces(account, 'dynamodb', tables) tables_without_owner = \ self.get_resources_without_owner(account, tables) unfiltered_tables = \ self.custom_dynamodb_filter( account, s, dynamodb, tables_without_owner ) self.set_resouces(account, 'dynamodb_no_owner', unfiltered_tables) def map_rds_resources(self): for account, s in self.sessions.items(): rds = s.client('rds') results = \ self.paginate(rds, 'describe_db_instances', 'DBInstances') instances = [t['DBInstanceIdentifier'] for t in results] self.set_resouces(account, 'rds', instances) instances_without_owner = \ self.get_resources_without_owner(account, instances) unfiltered_instances = \ self.custom_rds_filter(account, rds, instances_without_owner) self.set_resouces(account, 'rds_no_owner', unfiltered_instances) def map_rds_snapshots(self): self.wait_for_resource('rds') for account, s in self.sessions.items(): rds = s.client('rds') results = \ self.paginate(rds, 'describe_db_snapshots', 'DBSnapshots') snapshots = [t['DBSnapshotIdentifier'] for t in results] self.set_resouces(account, 'rds_snapshots', snapshots) snapshots_without_db = [t['DBSnapshotIdentifier'] for t in results if t['DBInstanceIdentifier'] not in self.resources[account]['rds']] unfiltered_snapshots = \ self.custom_rds_snapshot_filter(account, rds, snapshots_without_db) self.set_resouces(account, 'rds_snapshots_no_owner', unfiltered_snapshots) def map_route53_resources(self): for account, s in self.sessions.items(): client = s.client('route53') results = \ self.paginate(client, 'list_hosted_zones', 'HostedZones') zones = [z for z in results] for zone in zones: results = \ self.paginate(client, 'list_resource_record_sets', 'ResourceRecordSets', {'HostedZoneId': zone['Id']}) zone['records'] = results self.set_resouces(account, 'route53', zones) def map_ecr_resources(self): for account, s in self.sessions.items(): client = s.client('ecr') repositories = self.paginate(client=client, method='describe_repositories', key='repositories') self.set_resouces(account, 'ecr', repositories) @staticmethod def paginate(client, method, key, params={}): """ paginate returns an aggregated list of the specified key from all pages returned by executing the client's specified method.""" paginator = client.get_paginator(method) return [values for page in paginator.paginate(**params) for values in page.get(key, [])] def wait_for_resource(self, resource): """ wait_for_resource waits until the specified resource type is ready for all accounts. When we have more resource types then threads, this function will need to change to a dependency graph.""" wait = True while wait: wait = False for account in self.sessions: if self.resources[account].get(resource) is None: wait = True if wait: time.sleep(2) def set_resouces(self, account, key, value): with self._lock: self.resources[account][key] = value def get_resources_without_owner(self, account, resources): return [r for r in resources if not self.has_owner(account, r)] def has_owner(self, account, resource): has_owner = False for u in self.users[account]: if resource.lower().startswith(u.lower()): has_owner = True break if '://' in resource: if resource.split('/')[-1].startswith(u.lower()): has_owner = True break return has_owner def custom_s3_filter(self, account, s3, buckets): type = 's3 bucket' unfiltered_buckets = [] for b in buckets: try: tags = s3.get_bucket_tagging(Bucket=b) except botocore.exceptions.ClientError: tags = {} if not self.should_filter(account, type, b, tags, 'TagSet'): unfiltered_buckets.append(b) return unfiltered_buckets def custom_sqs_filter(self, account, sqs, queues): type = 'sqs queue' unfiltered_queues = [] for q in queues: tags = sqs.list_queue_tags(QueueUrl=q) if not self.should_filter(account, type, q, tags, 'Tags'): unfiltered_queues.append(q) return unfiltered_queues def custom_dynamodb_filter(self, account, session, dynamodb, tables): type = 'dynamodb table' dynamodb_resource = session.resource('dynamodb') unfiltered_tables = [] for t in tables: table_arn = dynamodb_resource.Table(t).table_arn tags = dynamodb.list_tags_of_resource(ResourceArn=table_arn) if not self.should_filter(account, type, t, tags, 'Tags'): unfiltered_tables.append(t) return unfiltered_tables def custom_rds_filter(self, account, rds, instances): type = 'rds instance' unfiltered_instances = [] for i in instances: instance = rds.describe_db_instances(DBInstanceIdentifier=i) instance_arn = instance['DBInstances'][0]['DBInstanceArn'] tags = rds.list_tags_for_resource(ResourceName=instance_arn) if not self.should_filter(account, type, i, tags, 'TagList'): unfiltered_instances.append(i) return unfiltered_instances def custom_rds_snapshot_filter(self, account, rds, snapshots): type = 'rds snapshots' unfiltered_snapshots = [] for s in snapshots: snapshot = rds.describe_db_snapshots(DBSnapshotIdentifier=s) snapshot_arn = snapshot['DBSnapshots'][0]['DBSnapshotArn'] tags = rds.list_tags_for_resource(ResourceName=snapshot_arn) if not self.should_filter(account, type, s, tags, 'TagList'): unfiltered_snapshots.append(s) return unfiltered_snapshots def should_filter(self, account, resource_type, resource_name, resource_tags, tags_key): if self.resource_has_special_name(account, resource_type, resource_name): return True if tags_key in resource_tags: tags = resource_tags[tags_key] if self.resource_has_special_tags(account, resource_type, resource_name, tags): return True return False @staticmethod def resource_has_special_name(account, type, resource): skip_msg = '[{}] skipping {} '.format(account, type) + \ '({} related) {}' ignore_names = { 'production': ['prod'], 'stage': ['stage', 'staging'], 'terraform': ['terraform', '-tf-'], } for msg, tags in ignore_names.items(): for tag in tags: if tag.lower() in resource.lower(): logging.debug(skip_msg.format(msg, resource)) return True return False def resource_has_special_tags(self, account, type, resource, tags): skip_msg = '[{}] skipping {} '.format(account, type) + \ '({}={}) {}' ignore_tags = { 'ENV': ['prod', 'stage', 'staging'], 'environment': ['prod', 'stage', 'staging'], 'owner': ['app-sre'], 'managed_by_integration': [ 'terraform_resources', 'terraform_users' ], 'aws_gc_hands_off': ['true'], } for tag, ignore_values in ignore_tags.items(): for ignore_value in ignore_values: value = self.get_tag_value(tags, tag) if ignore_value.lower() in value.lower(): logging.debug(skip_msg.format(tag, value, resource)) return True return False @staticmethod def get_tag_value(tags, tag): if isinstance(tags, dict): return tags.get(tag, '') elif isinstance(tags, list): for t in tags: if t['Key'] == tag: return t['Value'] return '' def delete_resources_without_owner(self, dry_run): for account, s in self.sessions.items(): for rt in self.resource_types: for r in self.resources[account].get(rt + '_no_owner', []): logging.info(['delete_resource', account, rt, r]) if not dry_run: self.delete_resource(s, rt, r) def delete_resource(self, session, resource_type, resource_name): if resource_type == 's3': resource = session.resource(resource_type) self.delete_bucket(resource, resource_name) elif resource_type == 'sqs': client = session.client(resource_type) self.delete_queue(client, resource_name) elif resource_type == 'dynamodb': resource = session.resource(resource_type) self.delete_table(resource, resource_name) elif resource_type == 'rds': client = session.client(resource_type) self.delete_instance(client, resource_name) elif resource_type == 'rds_snapshots': client = session.client(resource_type) self.delete_snapshot(client, resource_name) else: raise InvalidResourceTypeError(resource_type) @staticmethod def delete_bucket(s3, bucket_name): bucket = s3.Bucket(bucket_name) for key in bucket.objects.all(): key.delete() bucket.delete() @staticmethod def delete_queue(sqs, queue_url): sqs.delete_queue(QueueUrl=queue_url) @staticmethod def delete_table(dynamodb, table_name): table = dynamodb.Table(table_name) table.delete() @staticmethod def delete_instance(rds, instance_name): rds.delete_db_instance( DBInstanceIdentifier=instance_name, SkipFinalSnapshot=True, DeleteAutomatedBackups=True ) @staticmethod def delete_snapshot(rds, snapshot_identifier): rds.delete_db_snapshot( DBSnapshotIdentifier=snapshot_identifier ) @staticmethod def determine_key_type(iam, user): tags = iam.list_user_tags(UserName=user)['Tags'] managed_by_integration_tag = \ [t['Value'] for t in tags if t['Key'] == 'managed_by_integration'] # if this key belongs to a user without tags, i.e. not # managed by an integration, this key is probably created # manually. disable it to leave a trace if not managed_by_integration_tag: return 'unmanaged' # if this key belongs to a user created by the # 'terraform-users' integration, we just delete the key if managed_by_integration_tag[0] == 'terraform_users': return 'user' # if this key belongs to a user created by the # 'terraform-resources' integration, we remove # the key from terraform state and let it create # a new one on its own if managed_by_integration_tag[0] == 'terraform_resources': return 'service_account' huh = 'unrecognized managed_by_integration tag: {}'.format( managed_by_integration_tag[0]) raise InvalidResourceTypeError(huh) def delete_keys(self, dry_run, keys_to_delete, working_dirs, disable_service_account_keys): error = False users_keys = self.get_users_keys() for account, s in self.sessions.items(): iam = s.client('iam') keys = keys_to_delete.get(account, []) for key in keys: user_and_user_keys = [(user, user_keys) for user, user_keys in users_keys[account].items() if key in user_keys] if not user_and_user_keys: continue # unpack single item from sequence # since only a single user can have a given key [user_and_user_keys] = user_and_user_keys user = user_and_user_keys[0] user_keys = user_and_user_keys[1] key_type = self.determine_key_type(iam, user) key_status = self.get_user_key_status(iam, user, key) if key_type == 'unmanaged' and key_status == 'Active': logging.info(['disable_key', account, user, key]) if not dry_run: iam.update_access_key( UserName=user, AccessKeyId=key, Status='Inactive' ) elif key_type == 'user': logging.info(['delete_key', account, user, key]) if not dry_run: iam.delete_access_key( UserName=user, AccessKeyId=key ) elif key_type == 'service_account': # if key is disabled - delete it # this will happen after terraform-resources ran, # provisioned a new key, updated the output Secret, # recycled the pods and disabled the key. if key_status == 'Inactive': logging.info(['delete_inactive_key', account, user, key]) if not dry_run: iam.delete_access_key( UserName=user, AccessKeyId=key ) continue # if key is active and it is the only one - # remove it from terraform state. terraform-resources # will provision a new one. # may be a race condition here. TODO: check it if len(user_keys) == 1: logging.info(['remove_from_state', account, user, key]) if not dry_run: terraform.state_rm_access_key( working_dirs, account, user ) # if user has 2 keys and we remove the key from # terraform state, terraform-resources will not # be able to provision a new key - limbo. # this state should happen when terraform-resources # is running, provisioned a new key, # but did not disable the old key yet. if len(user_keys) == 2: # if true, this is a call made by terraform-resources # itself. disable the key and proceed. the key will be # deleted in a following iteration of aws-iam-keys. if disable_service_account_keys: logging.info(['disable_key', account, user, key]) if not dry_run: iam.update_access_key( UserName=user, AccessKeyId=key, Status='Inactive' ) else: msg = \ 'user {} has 2 keys, skipping to avoid error' logging.error(msg.format(user)) error = True return error def get_users_keys(self): users_keys = {} for account, s in self.sessions.items(): iam = s.client('iam') users_keys[account] = {user: self.get_user_keys(iam, user) for user in self.users[account]} return users_keys @staticmethod def get_user_keys(iam, user): key_list = iam.list_access_keys(UserName=user)['AccessKeyMetadata'] return [uk['AccessKeyId'] for uk in key_list] @staticmethod def get_user_key_status(iam, user, key): key_list = iam.list_access_keys(UserName=user)['AccessKeyMetadata'] return [k['Status'] for k in key_list if k['AccessKeyId'] == key][0] def get_support_cases(self): all_support_cases = {} for account, s in self.sessions.items(): if not self.accounts[account].get('premiumSupport'): continue try: support = s.client('support') support_cases = support.describe_cases( includeResolvedCases=True, includeCommunications=True )['cases'] all_support_cases[account] = support_cases except Exception as e: msg = '[{}] error getting support cases. details: {}' logging.error(msg.format(account, str(e))) return all_support_cases def init_ecr_auth_tokens(self, accounts): accounts_with_ecr = [a for a in accounts if a.get('ecrs')] if not accounts_with_ecr: return auth_tokens = {} results = threaded.run(self.get_tf_secrets, accounts_with_ecr, self.thread_pool_size) account_secrets = {account: secret for account, secret in results} for account in accounts_with_ecr: account_name = account['name'] account_secret = account_secrets[account_name] access_key = account_secret['aws_access_key_id'] secret_key = account_secret['aws_secret_access_key'] ecrs = account['ecrs'] for ecr in ecrs: region_name = ecr['region'] session = boto3.Session( aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region_name, ) client = session.client('ecr') token = client.get_authorization_token() auth_tokens[f"{account_name}/{region_name}"] = token self.auth_tokens = auth_tokens def get_cluster_vpc_id(self, account, route_tables=False): """ Returns a cluster VPC ID. :param account: a dictionary containing the following keys: - name - name of the AWS account - assume_role - role to assume to get access to the cluster's AWS account - assume_region - region in which to operate - assume_cidr - CIDR block of the cluster to use to find the matching VPC """ required_keys = \ ['name', 'assume_role', 'assume_region', 'assume_cidr'] ok = all(elem in account.keys() for elem in required_keys) if not ok: account_name = account.get('name') raise KeyError( '[{}] account is missing required keys'.format(account_name)) session = self.get_session(account['name']) sts = session.client('sts') role_arn = account['assume_role'] if not role_arn: raise KeyError( 'Could not find Role ARN. This is likely caused ' 'due to a missing awsInfrastructureAccess section.' ) role_name = role_arn.split('/')[1] response = sts.assume_role( RoleArn=role_arn, RoleSessionName=role_name ) credentials = response['Credentials'] assumed_session = boto3.Session( aws_access_key_id=credentials['AccessKeyId'], aws_secret_access_key=credentials['SecretAccessKey'], aws_session_token=credentials['SessionToken'], region_name=account['assume_region'] ) assumed_ec2 = assumed_session.client('ec2') vpcs = assumed_ec2.describe_vpcs() vpc_id = None for vpc in vpcs.get('Vpcs'): if vpc['CidrBlock'] == account['assume_cidr']: vpc_id = vpc['VpcId'] break route_table_ids = None if route_tables and vpc_id: vpc_route_tables = assumed_ec2.describe_route_tables( Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}] ) route_table_ids = [rt['RouteTableId'] for rt in vpc_route_tables['RouteTables']] return vpc_id, route_table_ids def get_vpcs_details(self, account, tags=None, route_tables=False): results = [] session = self.get_session(account['name']) ec2 = session.client('ec2') regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']] for region_name in regions: ec2 = session.client('ec2', region_name=region_name) vpcs = ec2.describe_vpcs( Filters=[ {'Name': f'tag:{k}', 'Values': [v]} for k, v in tags.items() ] ) for vpc in vpcs.get('Vpcs'): vpc_id = vpc['VpcId'] cidr_block = vpc['CidrBlock'] route_table_ids = None if route_tables: vpc_route_tables = ec2.describe_route_tables( Filters=[{'Name': 'vpc-id', 'Values': [vpc_id]}] ) route_table_ids = [rt['RouteTableId'] for rt in vpc_route_tables['RouteTables']] item = { 'vpc_id': vpc_id, 'region': region_name, 'cidr_block': cidr_block, 'route_table_ids': route_table_ids, } results.append(item) return results def get_tgws_details(self, account, region_name, tags=None, route_tables=False): results = [] session = self.get_session(account['name']) ec2 = session.client('ec2', region_name=region_name) tgws = ec2.describe_transit_gateways( Filters=[ {'Name': f'tag:{k}', 'Values': [v]} for k, v in tags.items() ] ) for tgw in tgws.get('TransitGateways'): tgw_id = tgw['TransitGatewayId'] tgw_arn = tgw['TransitGatewayArn'] item = { 'tgw_id': tgw_id, 'tgw_arn': tgw_arn, 'region': region_name, } results.append(item) return results def get_route53_zones(self): """ Return a list of (str, dict) representing Route53 DNS zones per account :return: route53 dns zones per account :rtype: list of (str, dict) """ return { account: self.resources.get(account, {}).get('route53', []) for account, _ in self.sessions.items() } def create_route53_zone(self, account_name, zone_name): """ Create a Route53 DNS zone :param account_name: the account name to operate on :param zone_name: name of the zone to create :type account_name: str :type zone_name: str """ session = self.get_session(account_name) client = session.client('route53') try: caller_ref = f"{datetime.now()}" client.create_hosted_zone( Name=zone_name, CallerReference=caller_ref, HostedZoneConfig={ 'Comment': 'Managed by App-Interface', }, ) except client.exceptions.InvalidDomainName: logging.error(f'[{account_name}] invalid domain name {zone_name}') except client.exceptions.HostedZoneAlreadyExists: logging.error( f'[{account_name}] hosted zone already exists: {zone_name}' ) except client.exceptions.TooManyHostedZones: logging.error(f'[{account_name}] too many hosted zones in account') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}') def delete_route53_zone(self, account_name, zone_id): """ Delete a Route53 DNS zone :param account_name: the account name to operate on :param zone_id: aws zone id of the zone to delete :type account_name: str :type zone_id: str """ session = self.get_session(account_name) client = session.client('route53') try: client.delete_hosted_zone(Id=zone_id) except client.exceptions.NoSuchHostedZone: logging.error(f'[{account_name}] Error trying to delete ' f'unknown DNS zone {zone_id}') except client.exceptions.HostedZoneNotEmpty: logging.error(f'[{account_name}] Cannot delete DNS zone that ' f'is not empty {zone_id}') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}') def delete_route53_record(self, account_name, zone_id, awsdata): """ Delete a Route53 DNS zone record :param account_name: the account name to operate on :param zone_id: aws zone id of the zone to operate on :param awsdata: aws record data of the record to delete :type account_name: str :type zone_id: str :type awsdata: dict """ session = self.get_session(account_name) client = session.client('route53') try: client.change_resource_record_sets( HostedZoneId=zone_id, ChangeBatch={ 'Changes': [{ 'Action': 'DELETE', 'ResourceRecordSet': awsdata, }] } ) except client.exceptions.NoSuchHostedZone: logging.error(f'[{account_name}] Error trying to delete record: ' f'unknown DNS zone {zone_id}') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}') def upsert_route53_record(self, account_name, zone_id, recordset): """ Upsert a Route53 DNS zone record :param account_name: the account name to operate on :param zone_id: aws zone id of the zone to operate on :param recordset: aws record data of the record to create or update :type account_name: str :type zone_id: str :type recordset: dict """ session = self.get_session(account_name) client = session.client('route53') try: client.change_resource_record_sets( HostedZoneId=zone_id, ChangeBatch={ 'Changes': [{ 'Action': 'UPSERT', 'ResourceRecordSet': recordset, }] } ) except client.exceptions.NoSuchHostedZone: logging.error(f'[{account_name}] Error trying to delete record: ' f'unknown DNS zone {zone_id}') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}')
class AWSApi: """Wrapper around AWS SDK""" def __init__(self, thread_pool_size, accounts, settings=None, init_ecr_auth_tokens=False, init_users=True): self.thread_pool_size = thread_pool_size self.secret_reader = SecretReader(settings=settings) self.init_sessions_and_resources(accounts) if init_ecr_auth_tokens: self.init_ecr_auth_tokens(accounts) if init_users: self.init_users() self._lock = Lock() self.resource_types = \ ['s3', 'sqs', 'dynamodb', 'rds', 'rds_snapshots'] # store the app-interface accounts in a dictionary indexed by name self.accounts = {acc['name']: acc for acc in accounts} # Setup caches on the instance itself to avoid leak # https://stackoverflow.com/questions/33672412/python-functools-lru-cache-with-class-methods-release-object # using @lru_cache decorators on methods would lek AWSApi instances # since the cache keeps a reference to self. self._account_ec2_client = functools.lru_cache()( self._account_ec2_client) self._get_assumed_role_client = functools.lru_cache()( self._get_assumed_role_client) self.get_account_vpcs = functools.lru_cache()(self.get_account_vpcs) self.get_vpc_route_tables = functools.lru_cache()( self.get_vpc_route_tables) self.get_vpc_subnets = functools.lru_cache()(self.get_vpc_subnets) self.get_vpc_default_sg_id = functools.lru_cache()( self.get_vpc_default_sg_id) self.get_transit_gateways = functools.lru_cache()( self.get_transit_gateways) self.get_transit_gateway_vpc_attachments = functools.lru_cache()( self.get_transit_gateway_vpc_attachments) def init_sessions_and_resources(self, accounts: Iterable[Account]): results = threaded.run(self.get_tf_secrets, accounts, self.thread_pool_size) self.sessions: Dict[str, Session] = {} self.resources: Dict[str, Any] = {} for account, secret in results: access_key = secret['aws_access_key_id'] secret_key = secret['aws_secret_access_key'] region_name = secret['region'] session = Session( aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region_name, ) self.sessions[account] = session self.resources[account] = {} def get_session(self, account: str) -> Session: return self.sessions[account] # pylint: disable=method-hidden def _account_ec2_client(self, account_name: str, region_name: Optional[str] = None) -> EC2Client: session = self.get_session(account_name) region = region_name if region_name else session.region_name return session.client('ec2', region_name=region) def get_tf_secrets(self, account): account_name = account['name'] automation_token = account['automationToken'] secret = self.secret_reader.read_all(automation_token) return (account_name, secret) def init_users(self): self.users = {} for account, s in self.sessions.items(): iam = s.client('iam') users = self.paginate(iam, 'list_users', 'Users') users = [u['UserName'] for u in users] self.users[account] = users def simulate_deleted_users(self, io_dir): src_integrations = ['terraform_resources', 'terraform_users'] if not os.path.exists(io_dir): return for i in src_integrations: file_path = os.path.join(io_dir, i + '.json') if not os.path.exists(file_path): continue with open(file_path, 'r') as f: deleted_users = json.load(f) for deleted_user in deleted_users: delete_from_account = deleted_user['account'] delete_user = deleted_user['user'] self.users[delete_from_account].remove(delete_user) def map_resources(self): threaded.run(self.map_resource, self.resource_types, self.thread_pool_size) def map_resource(self, resource_type): if resource_type == 's3': self.map_s3_resources() elif resource_type == 'sqs': self.map_sqs_resources() elif resource_type == 'dynamodb': self.map_dynamodb_resources() elif resource_type == 'rds': self.map_rds_resources() elif resource_type == 'rds_snapshots': self.map_rds_snapshots() elif resource_type == 'route53': self.map_route53_resources() else: raise InvalidResourceTypeError(resource_type) def map_s3_resources(self): for account, s in self.sessions.items(): s3 = s.client('s3') buckets_list = s3.list_buckets() if 'Buckets' not in buckets_list: continue buckets = [b['Name'] for b in buckets_list['Buckets']] self.set_resouces(account, 's3', buckets) buckets_without_owner = \ self.get_resources_without_owner(account, buckets) unfiltered_buckets = \ self.custom_s3_filter(account, s3, buckets_without_owner) self.set_resouces(account, 's3_no_owner', unfiltered_buckets) def map_sqs_resources(self): for account, s in self.sessions.items(): sqs = s.client('sqs') queues_list = sqs.list_queues() if 'QueueUrls' not in queues_list: continue queues = queues_list['QueueUrls'] self.set_resouces(account, 'sqs', queues) queues_without_owner = \ self.get_resources_without_owner(account, queues) unfiltered_queues = \ self.custom_sqs_filter(account, sqs, queues_without_owner) self.set_resouces(account, 'sqs_no_owner', unfiltered_queues) def map_dynamodb_resources(self): for account, s in self.sessions.items(): dynamodb = s.client('dynamodb') tables = self.paginate(dynamodb, 'list_tables', 'TableNames') self.set_resouces(account, 'dynamodb', tables) tables_without_owner = \ self.get_resources_without_owner(account, tables) unfiltered_tables = \ self.custom_dynamodb_filter( account, s, dynamodb, tables_without_owner ) self.set_resouces(account, 'dynamodb_no_owner', unfiltered_tables) def map_rds_resources(self): for account, s in self.sessions.items(): rds = s.client('rds') results = \ self.paginate(rds, 'describe_db_instances', 'DBInstances') instances = [t['DBInstanceIdentifier'] for t in results] self.set_resouces(account, 'rds', instances) instances_without_owner = \ self.get_resources_without_owner(account, instances) unfiltered_instances = \ self.custom_rds_filter(account, rds, instances_without_owner) self.set_resouces(account, 'rds_no_owner', unfiltered_instances) def map_rds_snapshots(self): self.wait_for_resource('rds') for account, s in self.sessions.items(): rds = s.client('rds') results = \ self.paginate(rds, 'describe_db_snapshots', 'DBSnapshots') snapshots = [t['DBSnapshotIdentifier'] for t in results] self.set_resouces(account, 'rds_snapshots', snapshots) snapshots_without_db = [ t['DBSnapshotIdentifier'] for t in results if t['DBInstanceIdentifier'] not in self.resources[account]['rds'] ] unfiltered_snapshots = \ self.custom_rds_snapshot_filter(account, rds, snapshots_without_db) self.set_resouces(account, 'rds_snapshots_no_owner', unfiltered_snapshots) def map_route53_resources(self): for account, s in self.sessions.items(): client = s.client('route53') results = \ self.paginate(client, 'list_hosted_zones', 'HostedZones') zones = list(results) for zone in zones: results = \ self.paginate(client, 'list_resource_record_sets', 'ResourceRecordSets', {'HostedZoneId': zone['Id']}) zone['records'] = results self.set_resouces(account, 'route53', zones) def map_ecr_resources(self): for account, s in self.sessions.items(): client = s.client('ecr') repositories = self.paginate(client=client, method='describe_repositories', key='repositories') self.set_resouces(account, 'ecr', repositories) @staticmethod def paginate(client, method, key, params={}): """ paginate returns an aggregated list of the specified key from all pages returned by executing the client's specified method.""" paginator = client.get_paginator(method) return [ values for page in paginator.paginate(**params) for values in page.get(key, []) ] def wait_for_resource(self, resource): """ wait_for_resource waits until the specified resource type is ready for all accounts. When we have more resource types then threads, this function will need to change to a dependency graph.""" wait = True while wait: wait = False for account in self.sessions: if self.resources[account].get(resource) is None: wait = True if wait: time.sleep(2) def set_resouces(self, account, key, value): with self._lock: self.resources[account][key] = value def get_resources_without_owner(self, account, resources): return [r for r in resources if not self.has_owner(account, r)] def has_owner(self, account, resource): has_owner = False for u in self.users[account]: if resource.lower().startswith(u.lower()): has_owner = True break if '://' in resource: if resource.split('/')[-1].startswith(u.lower()): has_owner = True break return has_owner def custom_s3_filter(self, account, s3, buckets): type = 's3 bucket' unfiltered_buckets = [] for b in buckets: try: tags = s3.get_bucket_tagging(Bucket=b) except botocore.exceptions.ClientError: tags = {} if not self.should_filter(account, type, b, tags, 'TagSet'): unfiltered_buckets.append(b) return unfiltered_buckets def custom_sqs_filter(self, account, sqs, queues): type = 'sqs queue' unfiltered_queues = [] for q in queues: tags = sqs.list_queue_tags(QueueUrl=q) if not self.should_filter(account, type, q, tags, 'Tags'): unfiltered_queues.append(q) return unfiltered_queues def custom_dynamodb_filter(self, account, session, dynamodb, tables): type = 'dynamodb table' dynamodb_resource = session.resource('dynamodb') unfiltered_tables = [] for t in tables: table_arn = dynamodb_resource.Table(t).table_arn tags = dynamodb.list_tags_of_resource(ResourceArn=table_arn) if not self.should_filter(account, type, t, tags, 'Tags'): unfiltered_tables.append(t) return unfiltered_tables def custom_rds_filter(self, account, rds, instances): type = 'rds instance' unfiltered_instances = [] for i in instances: instance = rds.describe_db_instances(DBInstanceIdentifier=i) instance_arn = instance['DBInstances'][0]['DBInstanceArn'] tags = rds.list_tags_for_resource(ResourceName=instance_arn) if not self.should_filter(account, type, i, tags, 'TagList'): unfiltered_instances.append(i) return unfiltered_instances def custom_rds_snapshot_filter(self, account, rds, snapshots): type = 'rds snapshots' unfiltered_snapshots = [] for s in snapshots: snapshot = rds.describe_db_snapshots(DBSnapshotIdentifier=s) snapshot_arn = snapshot['DBSnapshots'][0]['DBSnapshotArn'] tags = rds.list_tags_for_resource(ResourceName=snapshot_arn) if not self.should_filter(account, type, s, tags, 'TagList'): unfiltered_snapshots.append(s) return unfiltered_snapshots def should_filter(self, account, resource_type, resource_name, resource_tags, tags_key): if self.resource_has_special_name(account, resource_type, resource_name): return True if tags_key in resource_tags: tags = resource_tags[tags_key] if self.resource_has_special_tags(account, resource_type, resource_name, tags): return True return False @staticmethod def resource_has_special_name(account, type, resource): skip_msg = '[{}] skipping {} '.format(account, type) + \ '({} related) {}' ignore_names = { 'production': ['prod'], 'stage': ['stage', 'staging'], 'terraform': ['terraform', '-tf-'], } for msg, tags in ignore_names.items(): for tag in tags: if tag.lower() in resource.lower(): logging.debug(skip_msg.format(msg, resource)) return True return False def resource_has_special_tags(self, account, type, resource, tags): skip_msg = '[{}] skipping {} '.format(account, type) + \ '({}={}) {}' ignore_tags = { 'ENV': ['prod', 'stage', 'staging'], 'environment': ['prod', 'stage', 'staging'], 'owner': ['app-sre'], 'managed_by_integration': ['terraform_resources', 'terraform_users'], 'aws_gc_hands_off': ['true'], } for tag, ignore_values in ignore_tags.items(): for ignore_value in ignore_values: value = self.get_tag_value(tags, tag) if ignore_value.lower() in value.lower(): logging.debug(skip_msg.format(tag, value, resource)) return True return False @staticmethod def get_tag_value(tags, tag): if isinstance(tags, dict): return tags.get(tag, '') elif isinstance(tags, list): for t in tags: if t['Key'] == tag: return t['Value'] return '' def delete_resources_without_owner(self, dry_run): for account, s in self.sessions.items(): for rt in self.resource_types: for r in self.resources[account].get(rt + '_no_owner', []): logging.info(['delete_resource', account, rt, r]) if not dry_run: self.delete_resource(s, rt, r) def delete_resource(self, session, resource_type, resource_name): if resource_type == 's3': resource = session.resource(resource_type) self.delete_bucket(resource, resource_name) elif resource_type == 'sqs': client = session.client(resource_type) self.delete_queue(client, resource_name) elif resource_type == 'dynamodb': resource = session.resource(resource_type) self.delete_table(resource, resource_name) elif resource_type == 'rds': client = session.client(resource_type) self.delete_instance(client, resource_name) elif resource_type == 'rds_snapshots': client = session.client(resource_type) self.delete_snapshot(client, resource_name) else: raise InvalidResourceTypeError(resource_type) @staticmethod def delete_bucket(s3, bucket_name): bucket = s3.Bucket(bucket_name) bucket.object_versions.delete() bucket.delete() @staticmethod def delete_queue(sqs, queue_url): sqs.delete_queue(QueueUrl=queue_url) @staticmethod def delete_table(dynamodb, table_name): table = dynamodb.Table(table_name) table.delete() @staticmethod def delete_instance(rds, instance_name): rds.delete_db_instance(DBInstanceIdentifier=instance_name, SkipFinalSnapshot=True, DeleteAutomatedBackups=True) @staticmethod def delete_snapshot(rds, snapshot_identifier): rds.delete_db_snapshot(DBSnapshotIdentifier=snapshot_identifier) @staticmethod def determine_key_type(iam, user): tags = iam.list_user_tags(UserName=user)['Tags'] managed_by_integration_tag = \ [t['Value'] for t in tags if t['Key'] == 'managed_by_integration'] # if this key belongs to a user without tags, i.e. not # managed by an integration, this key is probably created # manually. disable it to leave a trace if not managed_by_integration_tag: return 'unmanaged' # if this key belongs to a user created by the # 'terraform-users' integration, we just delete the key if managed_by_integration_tag[0] == 'terraform_users': return 'user' # if this key belongs to a user created by the # 'terraform-resources' integration, we remove # the key from terraform state and let it create # a new one on its own if managed_by_integration_tag[0] == 'terraform_resources': return 'service_account' huh = 'unrecognized managed_by_integration tag: {}'.format( managed_by_integration_tag[0]) raise InvalidResourceTypeError(huh) def delete_keys(self, dry_run, keys_to_delete, working_dirs, disable_service_account_keys): error = False users_keys = self.get_users_keys() for account, s in self.sessions.items(): iam = s.client('iam') keys = keys_to_delete.get(account, []) for key in keys: user_and_user_keys = [ (user, user_keys) for user, user_keys in users_keys[account].items() if key in user_keys ] if not user_and_user_keys: continue # unpack single item from sequence # since only a single user can have a given key [user_and_user_keys] = user_and_user_keys user = user_and_user_keys[0] user_keys = user_and_user_keys[1] key_type = self.determine_key_type(iam, user) key_status = self.get_user_key_status(iam, user, key) if key_type == 'unmanaged' and key_status == 'Active': logging.info(['disable_key', account, user, key]) if not dry_run: iam.update_access_key(UserName=user, AccessKeyId=key, Status='Inactive') elif key_type == 'user': logging.info(['delete_key', account, user, key]) if not dry_run: iam.delete_access_key(UserName=user, AccessKeyId=key) elif key_type == 'service_account': # if key is disabled - delete it # this will happen after terraform-resources ran, # provisioned a new key, updated the output Secret, # recycled the pods and disabled the key. if key_status == 'Inactive': logging.info( ['delete_inactive_key', account, user, key]) if not dry_run: iam.delete_access_key(UserName=user, AccessKeyId=key) continue # if key is active and it is the only one - # remove it from terraform state. terraform-resources # will provision a new one. # may be a race condition here. TODO: check it if len(user_keys) == 1: logging.info(['remove_from_state', account, user, key]) if not dry_run: terraform.state_rm_access_key( working_dirs, account, user) # if user has 2 keys and we remove the key from # terraform state, terraform-resources will not # be able to provision a new key - limbo. # this state should happen when terraform-resources # is running, provisioned a new key, # but did not disable the old key yet. if len(user_keys) == 2: # if true, this is a call made by terraform-resources # itself. disable the key and proceed. the key will be # deleted in a following iteration of aws-iam-keys. if disable_service_account_keys: logging.info(['disable_key', account, user, key]) if not dry_run: iam.update_access_key(UserName=user, AccessKeyId=key, Status='Inactive') else: msg = \ 'user {} has 2 keys, skipping to avoid error' logging.error(msg.format(user)) error = True return error def get_users_keys(self): users_keys = {} for account, s in self.sessions.items(): iam = s.client('iam') users_keys[account] = { user: self.get_user_keys(iam, user) for user in self.users[account] } return users_keys def reset_password(self, account, user_name): s = self.sessions[account] iam = s.client('iam') iam.delete_login_profile(UserName=user_name) def reset_mfa(self, account, user_name): s = self.sessions[account] iam = s.client('iam') mfa_devices = iam.list_mfa_devices(UserName=user_name)['MFADevices'] for d in mfa_devices: serial_number = d['SerialNumber'] iam.deactivate_mfa_device(UserName=user_name, SerialNumber=serial_number) iam.delete_virtual_mfa_device(SerialNumber=serial_number) @staticmethod def get_user_keys(iam, user): key_list = iam.list_access_keys(UserName=user)['AccessKeyMetadata'] return [uk['AccessKeyId'] for uk in key_list] @staticmethod def get_user_key_status(iam, user, key): key_list = iam.list_access_keys(UserName=user)['AccessKeyMetadata'] return [k['Status'] for k in key_list if k['AccessKeyId'] == key][0] def get_support_cases(self): all_support_cases = {} for account, s in self.sessions.items(): if not self.accounts[account].get('premiumSupport'): continue try: support = s.client('support') support_cases = support.describe_cases( includeResolvedCases=True, includeCommunications=True)['cases'] all_support_cases[account] = support_cases except Exception as e: msg = '[{}] error getting support cases. details: {}' logging.error(msg.format(account, str(e))) return all_support_cases def init_ecr_auth_tokens(self, accounts): accounts_with_ecr = [a for a in accounts if a.get('ecrs')] if not accounts_with_ecr: return auth_tokens = {} results = threaded.run(self.get_tf_secrets, accounts_with_ecr, self.thread_pool_size) account_secrets = dict(results) for account in accounts_with_ecr: account_name = account['name'] account_secret = account_secrets[account_name] access_key = account_secret['aws_access_key_id'] secret_key = account_secret['aws_secret_access_key'] ecrs = account['ecrs'] for ecr in ecrs: region_name = ecr['region'] session = Session( aws_access_key_id=access_key, aws_secret_access_key=secret_key, region_name=region_name, ) client = session.client('ecr') token = client.get_authorization_token() auth_tokens[f"{account_name}/{region_name}"] = token self.auth_tokens = auth_tokens @staticmethod def _get_account_assume_data(account: Account) -> Tuple[str, str, str]: """ returns mandatory data to be able to assume a role with this account: (account_name, assume_role, assume_region) """ required_keys = \ ['name', 'assume_role', 'assume_region'] ok = all(elem in account.keys() for elem in required_keys) if not ok: account_name = account.get('name') raise KeyError( '[{}] account is missing required keys'.format(account_name)) return (account['name'], account['assume_role'], account['assume_region']) def _get_assume_role_session(self, account_name: str, assume_role: str, assume_region: str) -> Session: """ Returns a session for a supplied role to assume: :param name: name of the AWS account :param assume_role: role to assume to get access to the cluster's AWS account :param assume_region: region in which to operate """ session = self.get_session(account_name) sts = session.client('sts') if not assume_role: raise MissingARNError( f'Could not find Role ARN {assume_role} on account ' f'{account_name}. This is likely caused by a missing ' 'awsInfrastructureAccess section.') role_name = assume_role.split('/')[1] response = sts.assume_role(RoleArn=assume_role, RoleSessionName=role_name) credentials = response['Credentials'] assumed_session = Session( aws_access_key_id=credentials['AccessKeyId'], aws_secret_access_key=credentials['SecretAccessKey'], aws_session_token=credentials['SessionToken'], region_name=assume_region) return assumed_session # pylint: disable=method-hidden def _get_assumed_role_client(self, account_name: str, assume_role: str, assume_region: str, client_type='ec2') -> EC2Client: assumed_session = self._get_assume_role_session( account_name, assume_role, assume_region) return assumed_session.client(client_type) @staticmethod # pylint: disable=method-hidden def get_account_vpcs(ec2: EC2Client) -> List[VpcTypeDef]: vpcs = ec2.describe_vpcs() return vpcs.get('Vpcs', []) # filters a list of aws resources according to tags @staticmethod def filter_on_tags(items: Iterable[Any], tags: Mapping[str, str] = {}) \ -> List[Any]: res = [] for item in items: tags_dict = {t['Key']: t['Value'] for t in item.get('Tags', [])} if all(tags_dict.get(k) == values for k, values in tags.items()): res.append(item) return res @staticmethod # pylint: disable=method-hidden def get_vpc_route_tables(vpc_id: str, ec2: EC2Client) \ -> List[RouteTableTypeDef]: rts = ec2.describe_route_tables(Filters=[{ 'Name': 'vpc-id', 'Values': [vpc_id] }]) return rts.get('RouteTables', []) @staticmethod # pylint: disable=method-hidden def get_vpc_subnets(vpc_id: str, ec2: EC2Client) \ -> List[SubnetTypeDef]: subnets = ec2.describe_subnets(Filters=[{ 'Name': 'vpc-id', 'Values': [vpc_id] }]) return subnets.get('Subnets', []) def get_cluster_vpc_details(self, account, route_tables=False, subnets=False): """ Returns a cluster VPC details: - VPC ID - Route table IDs (optional) - Subnets list including Subnet ID and Subnet Availability zone :param account: a dictionary containing the following keys: - name - name of the AWS account - assume_role - role to assume to get access to the cluster's AWS account - assume_region - region in which to operate - assume_cidr - CIDR block of the cluster to use to find the matching VPC """ assume_role_data = self._get_account_assume_data(account) assumed_ec2 = self._get_assumed_role_client(*assume_role_data) vpcs = self.get_account_vpcs(assumed_ec2) vpc_id = None for vpc in vpcs: if vpc['CidrBlock'] == account['assume_cidr']: vpc_id = vpc['VpcId'] break route_table_ids = None subnets_id_az = None if vpc_id: if route_tables: vpc_route_tables = \ self.get_vpc_route_tables(vpc_id, assumed_ec2) route_table_ids = [ rt['RouteTableId'] for rt in vpc_route_tables ] if subnets: vpc_subnets = self.get_vpc_subnets(vpc_id, assumed_ec2) subnets_id_az = \ [ { 'id': s['SubnetId'], 'az': s['AvailabilityZone'] } for s in vpc_subnets ] return vpc_id, route_table_ids, subnets_id_az def get_cluster_nat_gateways_egress_ips(self, account): assumed_role_data = self._get_account_assume_data(account) assumed_ec2 = self._get_assumed_role_client(*assumed_role_data) nat_gateways = assumed_ec2.describe_nat_gateways() egress_ips = set() for nat in nat_gateways.get('NatGateways'): for address in nat['NatGatewayAddresses']: egress_ips.add(address['PublicIp']) return egress_ips def get_vpcs_details(self, account, tags=None, route_tables=False): results = [] ec2 = self._account_ec2_client(account['name']) regions = [r['RegionName'] for r in ec2.describe_regions()['Regions']] for region_name in regions: ec2 = self._account_ec2_client(account['name'], region_name) vpcs = self.get_account_vpcs(ec2) vpcs = self.filter_on_tags(vpcs, tags) for vpc in vpcs: vpc_id = vpc['VpcId'] cidr_block = vpc['CidrBlock'] route_table_ids = None if route_tables: vpc_route_tables = self.get_vpc_route_tables(vpc_id, ec2) route_table_ids = [ rt['RouteTableId'] for rt in vpc_route_tables ] item = { 'vpc_id': vpc_id, 'region': region_name, 'cidr_block': cidr_block, 'route_table_ids': route_table_ids, } results.append(item) return results def get_alb_network_interface_ips(self, account, service_name): assumed_role_data = self._get_account_assume_data(account) ec2_client = self._get_assumed_role_client(*assumed_role_data, 'ec2') elb_client = self._get_assumed_role_client(*assumed_role_data, 'elb') service_tag = \ {'Key': 'kubernetes.io/service-name', 'Value': service_name} nis = ec2_client.describe_network_interfaces()['NetworkInterfaces'] lbs = elb_client.describe_load_balancers()['LoadBalancerDescriptions'] result_ips = set() for lb in lbs: lb_name = lb['LoadBalancerName'] tag_descriptions = elb_client.describe_tags( LoadBalancerNames=[lb_name])['TagDescriptions'] for td in tag_descriptions: tags = td['Tags'] if service_tag not in tags: continue # found a load balancer we want to work with # find all network interfaces related to it for ni in nis: if ni['Description'] != f"ELB {lb_name}": continue if ni['Status'] != 'in-use': continue # found a network interface! ip = ni['PrivateIpAddress'] result_ips.add(ip) return result_ips @staticmethod # pylint: disable=method-hidden def get_vpc_default_sg_id(vpc_id: str, ec2: EC2Client) -> Optional[str]: vpc_security_groups = ec2.describe_security_groups( Filters=[{ 'Name': 'vpc-id', 'Values': [vpc_id] }, { 'Name': 'group-name', 'Values': ['default'] }]) # there is only one default for sg in vpc_security_groups.get('SecurityGroups', []): return sg['GroupId'] return None @staticmethod # pylint: disable=method-hidden def get_transit_gateways(ec2: EC2Client) -> List[TransitGatewayTypeDef]: tgws = ec2.describe_transit_gateways() return tgws.get('TransitGateways', []) def get_tgw_default_route_table_id(self, ec2: EC2Client, tgw_id: str, tags: Mapping[str, str]) \ -> Optional[str]: tgws = self.get_transit_gateways(ec2) tgws = self.filter_on_tags(tgws, tags) # we know the party TGW exists, so we can be # a little less catious about getting it [tgw] = [t for t in tgws if t['TransitGatewayId'] == tgw_id] tgw_options = tgw['Options'] tgw_has_route_table = \ tgw_options['DefaultRouteTableAssociation'] == 'enable' # currently only adding routes # to the default route table if tgw_has_route_table: return tgw_options['AssociationDefaultRouteTableId'] return None @staticmethod # pylint: disable=method-hidden def get_transit_gateway_vpc_attachments(tgw_id: str, ec2: EC2Client) \ -> List[TransitGatewayVpcAttachmentTypeDef]: atts = ec2.describe_transit_gateway_vpc_attachments( Filters=[{ 'Name': 'transit-gateway-id', 'Values': [tgw_id] }]) return atts.get('TransitGatewayVpcAttachments', []) def get_tgws_details(self, account, region_name, routes_cidr_block, tags=None, route_tables=False, security_groups=False): results = [] ec2 = self._account_ec2_client(account['name'], region_name) tgws = ec2.describe_transit_gateways(Filters=[{ 'Name': f'tag:{k}', 'Values': [v] } for k, v in tags.items()]) for tgw in tgws.get('TransitGateways'): tgw_id = tgw['TransitGatewayId'] tgw_arn = tgw['TransitGatewayArn'] item = { 'tgw_id': tgw_id, 'tgw_arn': tgw_arn, 'region': region_name, } if route_tables or security_groups: # both routes and rules are provisioned for resources # that are indirectly attached to the TGW. # routes are provisioned for route tables that belong # to TGWs which are peered to the TGW we are currently # handling. routes = [] # rules are provisioned for security groups that belong # to VPCs which are attached to the TGW we are currently # handling AND to TGWs which are peered to it. rules = [] # this will require to iterate over all reachable TGWs attachments = \ ec2.describe_transit_gateway_peering_attachments( Filters=[ {'Name': 'transit-gateway-id', 'Values': [tgw_id]} ] ) for a in attachments.get('TransitGatewayPeeringAttachments'): tgw_attachment_id = a['TransitGatewayAttachmentId'] tgw_attachment_state = a['State'] if tgw_attachment_state != 'available': continue # we don't care who is who, so let's iterate over parties attachment_parties = \ [a['RequesterTgwInfo'], a['AccepterTgwInfo']] for party in attachment_parties: party_tgw_id = party['TransitGatewayId'] party_region = party['Region'] party_ec2 = self._account_ec2_client( account['name'], party_region) # the TGW route table is automatically populated # with the peered VPC cidr block. # however, to achieve global routing across peered # TGWs in different regions, we need to find all # peering attachments in different regions and collect # the data to later create a route in each peered TGW # in a different region. this will require getting: # - cluster cidr block # - transit gateway attachment id # - transit gateway route table id # we will also pass some additional information: # - transit gateway id # - transit gateway region if route_tables: # don't act on yourself and # routes are propogated within the same region if party_tgw_id != tgw_id and \ party_region != region_name: party_tgw_route_table_id = \ self.get_tgw_default_route_table_id( party_ec2, party_tgw_id, tags) if party_tgw_route_table_id is not None: # that's it, we have all # the information we need route_item = { 'cidr_block': routes_cidr_block, 'tgw_attachment_id': tgw_attachment_id, 'tgw_id': party_tgw_id, 'tgw_route_table_id': party_tgw_route_table_id, 'region': party_region } routes.append(route_item) # once all the routing is in place, we need to allow # connections in security groups. # in TGW, we need to allow the rules in the VPCs # associated to the TGWs that need to accept the # traffic. we need to collect data about the vpc # attachments for the TGWs, and for each VPC get # the details of it's default securiry group. # this will require getting: # - cluster cidr block # - security group id # we will also pass some additional information: # - vpc id # - vpc region if security_groups: vpc_attachments = \ self.get_transit_gateway_vpc_attachments( party_tgw_id, party_ec2) for va in vpc_attachments: vpc_attachment_vpc_id = va['VpcId'] vpc_attachment_state = va['State'] if vpc_attachment_state != 'available': continue sg_id = self.get_vpc_default_sg_id( vpc_attachment_vpc_id, party_ec2) if sg_id is not None: # that's it, we have all # the information we need rule_item = { 'cidr_block': routes_cidr_block, 'security_group_id': sg_id, 'vpc_id': vpc_attachment_vpc_id, 'region': party_region } rules.append(rule_item) if route_tables: item['routes'] = routes if security_groups: item['rules'] = rules results.append(item) return results def get_route53_zones(self): """ Return a list of (str, dict) representing Route53 DNS zones per account :return: route53 dns zones per account :rtype: list of (str, dict) """ return { account: self.resources.get(account, {}).get('route53', []) for account, _ in self.sessions.items() } def create_route53_zone(self, account_name, zone_name): """ Create a Route53 DNS zone :param account_name: the account name to operate on :param zone_name: name of the zone to create :type account_name: str :type zone_name: str """ session = self.get_session(account_name) client = session.client('route53') try: caller_ref = f"{datetime.now()}" client.create_hosted_zone( Name=zone_name, CallerReference=caller_ref, HostedZoneConfig={ 'Comment': 'Managed by App-Interface', }, ) except client.exceptions.InvalidDomainName: logging.error(f'[{account_name}] invalid domain name {zone_name}') except client.exceptions.HostedZoneAlreadyExists: logging.error( f'[{account_name}] hosted zone already exists: {zone_name}') except client.exceptions.TooManyHostedZones: logging.error(f'[{account_name}] too many hosted zones in account') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}') def delete_route53_zone(self, account_name, zone_id): """ Delete a Route53 DNS zone :param account_name: the account name to operate on :param zone_id: aws zone id of the zone to delete :type account_name: str :type zone_id: str """ session = self.get_session(account_name) client = session.client('route53') try: client.delete_hosted_zone(Id=zone_id) except client.exceptions.NoSuchHostedZone: logging.error(f'[{account_name}] Error trying to delete ' f'unknown DNS zone {zone_id}') except client.exceptions.HostedZoneNotEmpty: logging.error(f'[{account_name}] Cannot delete DNS zone that ' f'is not empty {zone_id}') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}') def delete_route53_record(self, account_name, zone_id, awsdata): """ Delete a Route53 DNS zone record :param account_name: the account name to operate on :param zone_id: aws zone id of the zone to operate on :param awsdata: aws record data of the record to delete :type account_name: str :type zone_id: str :type awsdata: dict """ session = self.get_session(account_name) client = session.client('route53') try: client.change_resource_record_sets(HostedZoneId=zone_id, ChangeBatch={ 'Changes': [{ 'Action': 'DELETE', 'ResourceRecordSet': awsdata, }] }) except client.exceptions.NoSuchHostedZone: logging.error(f'[{account_name}] Error trying to delete record: ' f'unknown DNS zone {zone_id}') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}') def upsert_route53_record(self, account_name, zone_id, recordset): """ Upsert a Route53 DNS zone record :param account_name: the account name to operate on :param zone_id: aws zone id of the zone to operate on :param recordset: aws record data of the record to create or update :type account_name: str :type zone_id: str :type recordset: dict """ session = self.get_session(account_name) client = session.client('route53') try: client.change_resource_record_sets(HostedZoneId=zone_id, ChangeBatch={ 'Changes': [{ 'Action': 'UPSERT', 'ResourceRecordSet': recordset, }] }) except client.exceptions.NoSuchHostedZone: logging.error(f'[{account_name}] Error trying to delete record: ' f'unknown DNS zone {zone_id}') except Exception as e: logging.error(f'[{account_name}] unhandled exception: {e}')
def get_apps_data(date, month_delta=1): apps = queries.get_apps() saas_files = queries.get_saas_files() jjb, _ = init_jjb() saas_jobs = jjb.get_all_jobs(job_types=['saas-deploy', 'promote-to-prod']) build_master_jobs = jjb.get_all_jobs(job_types=['build-master']) jenkins_map = jenkins_base.get_jenkins_map() time_limit = date - relativedelta(months=month_delta) timestamp_limit = \ int(time_limit.replace(tzinfo=timezone.utc).timestamp()) saas_build_history = \ get_build_history(jenkins_map, saas_jobs, timestamp_limit) build_master_build_history = \ get_build_history(jenkins_map, build_master_jobs, timestamp_limit) settings = queries.get_app_interface_settings() secret_reader = SecretReader(settings=settings) secret_content = secret_reader.read_all({'path': DASHDOTDB_SECRET}) dashdotdb_url = secret_content['url'] dashdotdb_user = secret_content['username'] dashdotdb_pass = secret_content['password'] metrics = requests.get(f'{dashdotdb_url}/api/v1/metrics', auth=(dashdotdb_user, dashdotdb_pass)).text namespaces = queries.get_namespaces() for app in apps: if not app['codeComponents']: continue app_name = app['name'] logging.info(f"collecting post-deploy jobs " f"information for {app_name}") post_deploy_jobs = {} for saas_file in saas_files: if saas_file['app']['name'] != app_name: continue resource_types = saas_file['managedResourceTypes'] # Only jobs of these types are expected to have a # further post-deploy job if not any([ 'Deployment' in resource_types, 'DeploymentConfig' not in resource_types ]): continue for resource_template in saas_file['resourceTemplates']: for target in resource_template['targets']: cluster = target['namespace']['cluster']['name'] namespace = target['namespace']['name'] post_deploy_jobs[cluster] = {} post_deploy_jobs[cluster][namespace] = False for saas_file in saas_files: if saas_file['app']['name'] != app_name: continue resource_types = saas_file['managedResourceTypes'] if 'Job' not in resource_types: continue for resource_template in saas_file['resourceTemplates']: for target in resource_template['targets']: cluster = target['namespace']['cluster']['name'] namespace = target['namespace']['name'] # This block skips the check if the cluster/namespace # has no Deployment/DeploymentConfig job associated. if cluster not in post_deploy_jobs: continue if namespace not in post_deploy_jobs[cluster]: continue # Post-deploy job must depend on a openshift-saas-deploy # job if target['upstream'] is None: continue if target['upstream'].startswith('openshift-saas-deploy-'): post_deploy_jobs[cluster][namespace] = True app['post_deploy_jobs'] = post_deploy_jobs logging.info(f"collecting promotions for {app_name}") app['promotions'] = {} saas_repos = [ c['url'] for c in app['codeComponents'] if c['resource'] == 'saasrepo' ] for sr in saas_repos: sr_history = saas_build_history.get(sr) if not sr_history: continue successes = [h for h in sr_history if h == 'SUCCESS'] app['promotions'][sr] = (len(sr_history), len(successes)) logging.info(f"collecting merge activity for {app_name}") app['merge_activity'] = {} code_repos = [ c['url'] for c in app['codeComponents'] if c['resource'] == 'upstream' ] for cr in code_repos: cr_history = build_master_build_history.get(cr) if not cr_history: continue successes = [h for h in cr_history if h == 'SUCCESS'] app['merge_activity'][cr] = (len(cr_history), len(successes)) logging.info(f"collecting vulnerabilities information for {app_name}") app_namespaces = [] for namespace in namespaces: if namespace['app']['name'] != app['name']: continue app_namespaces.append(namespace) app_metrics = {} for family in text_string_to_metric_families(metrics): for sample in family.samples: if sample.name != 'imagemanifestvuln_total': continue for app_namespace in app_namespaces: cluster = sample.labels['cluster'] if app_namespace['cluster']['name'] != cluster: continue namespace = sample.labels['namespace'] if app_namespace['name'] != namespace: continue severity = sample.labels['severity'] if cluster not in app_metrics: app_metrics[cluster] = {} if namespace not in app_metrics[cluster]: app_metrics[cluster][namespace] = {} if severity not in app_metrics[cluster][namespace]: value = int(sample.value) app_metrics[cluster][namespace][severity] = value app['container_vulnerabilities'] = app_metrics return apps
def __init__(self, configs, ssl_verify=True, settings=None): self.settings = settings self.secret_reader = SecretReader(settings=settings) self.collect_configs(configs) self.modify_logger() self.python_https_verify = str(int(ssl_verify))
class DashdotdbBase: def __init__(self, dry_run, thread_pool_size, marker, scope): self.dry_run = dry_run self.thread_pool_size = thread_pool_size self.settings = queries.get_app_interface_settings() self.secret_reader = SecretReader(settings=self.settings) self.secret_content = self.secret_reader.read_all( {'path': DASHDOTDB_SECRET}) self.dashdotdb_url = self.secret_content['url'] self.dashdotdb_user = self.secret_content['username'] self.dashdotdb_pass = self.secret_content['password'] self.logmarker = marker self.scope = scope def _get_token(self): if self.dry_run: return None params = {'scope': self.scope} endpoint = (f'{self.dashdotdb_url}/api/v1/' f'token') response = requests.get(url=endpoint, params=params, auth=(self.dashdotdb_user, self.dashdotdb_pass), timeout=(5, 120)) try: response.raise_for_status() except requests.exceptions.RequestException as details: LOG.error('%s error retrieving token for %s data: %s', self.logmarker, self.scope, details) return None self.dashdotdb_token = response.text.replace('"', '').strip() def _close_token(self): if self.dry_run: return None params = {'scope': self.scope} endpoint = (f'{self.dashdotdb_url}/api/v1/' f'token/{self.dashdotdb_token}') response = requests.delete(url=endpoint, params=params, auth=(self.dashdotdb_user, self.dashdotdb_pass), timeout=(5, 120)) try: response.raise_for_status() except requests.exceptions.RequestException as details: LOG.error('%s error closing token for %s data: %s', self.logmarker, self.scope, details) def _do_post(self, endpoint, data, timeout=(5, 120)): return requests.post(url=endpoint, json=data, headers={"X-Auth": self.dashdotdb_token}, auth=(self.dashdotdb_user, self.dashdotdb_pass), timeout=timeout) def _promget(self, url, params, token=None, ssl_verify=True, uri='api/v1/query'): url = urljoin((f'{url}'), uri) LOG.debug('%s Fetching prom payload from %s?%s', self.logmarker, url, params) headers = { "accept": "application/json", } if token: headers["Authorization"] = (f"Bearer {token}") response = requests.get(url, params=params, headers=headers, verify=ssl_verify, timeout=(5, 120)) response.raise_for_status() response = response.json() # TODO ensure len response == 1 # return response['data']['result'] return response def _get_automationtoken(self, tokenpath): autotoken_reader = SecretReader(settings=self.settings) token = autotoken_reader.read(tokenpath) return token
def get_apps_data(date, month_delta=1, thread_pool_size=10): apps = queries.get_apps() saas_files = queries.get_saas_files() jjb, _ = init_jjb() jenkins_map = jenkins_base.get_jenkins_map() time_limit = date - relativedelta(months=month_delta) timestamp_limit = \ int(time_limit.replace(tzinfo=timezone.utc).timestamp()) settings = queries.get_app_interface_settings() secret_reader = SecretReader(settings=settings) secret_content = secret_reader.read_all({'path': DASHDOTDB_SECRET}) dashdotdb_url = secret_content['url'] dashdotdb_user = secret_content['username'] dashdotdb_pass = secret_content['password'] metrics = requests.get(f'{dashdotdb_url}/api/v1/metrics', auth=(dashdotdb_user, dashdotdb_pass)).text namespaces = queries.get_namespaces() build_jobs = jjb.get_all_jobs(job_types=['build']) jobs_to_get = build_jobs.copy() saas_deploy_jobs = [] for saas_file in saas_files: saas_file_name = saas_file['name'] for template in saas_file["resourceTemplates"]: for target in template["targets"]: job = {} job['env'] = target["namespace"]["environment"]["name"] job['app'] = target["namespace"]["app"]["name"] job['cluster'] = target['namespace']['cluster']['name'] job['namespace'] = target['namespace']['name'] job['name'] = get_openshift_saas_deploy_job_name( saas_file_name, job['env'], settings ) job['saas_file_name'] = saas_file_name job['instance'] = saas_file["instance"]["name"] saas_deploy_jobs.append(job) if job['instance'] not in jobs_to_get: jobs_to_get[job['instance']] = [job] else: jobs_to_get[job['instance']].append(job) job_history = get_build_history_pool( jenkins_map, jobs_to_get, timestamp_limit, thread_pool_size ) for app in apps: if not app['codeComponents']: continue app_name = app['name'] logging.info(f"collecting post-deploy jobs " f"information for {app_name}") post_deploy_jobs = {} for saas_file in saas_files: if saas_file['app']['name'] != app_name: continue resource_types = saas_file['managedResourceTypes'] # Only jobs of these types are expected to have a # further post-deploy job if not any(['Deployment' in resource_types, 'DeploymentConfig' not in resource_types]): continue for resource_template in saas_file['resourceTemplates']: for target in resource_template['targets']: cluster = target['namespace']['cluster']['name'] namespace = target['namespace']['name'] post_deploy_jobs[cluster] = {} post_deploy_jobs[cluster][namespace] = False for saas_file in saas_files: if saas_file['app']['name'] != app_name: continue resource_types = saas_file['managedResourceTypes'] if 'Job' not in resource_types: continue for resource_template in saas_file['resourceTemplates']: for target in resource_template['targets']: cluster = target['namespace']['cluster']['name'] namespace = target['namespace']['name'] # This block skips the check if the cluster/namespace # has no Deployment/DeploymentConfig job associated. if cluster not in post_deploy_jobs: continue if namespace not in post_deploy_jobs[cluster]: continue # Post-deploy job must depend on a openshift-saas-deploy # job if target['upstream'] is None: continue if target['upstream'].startswith('openshift-saas-deploy-'): post_deploy_jobs[cluster][namespace] = True app['post_deploy_jobs'] = post_deploy_jobs logging.info(f"collecting promotion history for {app_name}") app["promotions"] = {} for job in saas_deploy_jobs: if job['app'] != app_name: continue if job['name'] not in job_history: continue history = job_history[job["name"]] saas_file_name = job['saas_file_name'] if saas_file_name not in app["promotions"]: app["promotions"][saas_file_name] = [{ "env": job["env"], "cluster": job["cluster"], "namespace": job["namespace"], **history }] else: app["promotions"][saas_file_name].append({ "env": job["env"], "cluster": job["cluster"], "namespace": job["namespace"], **history }) logging.info(f"collecting merge activity for {app_name}") app['merge_activity'] = {} code_repos = [c['url'] for c in app['codeComponents'] if c['resource'] == 'upstream'] for instance, jobs in build_jobs.items(): for job in jobs: try: repo_url = get_repo_url(job) except KeyError: continue if repo_url not in code_repos: continue if job['name'] not in job_history: continue history = job_history[job['name']] if repo_url not in app["merge_activity"]: app["merge_activity"][repo_url] = [{ "branch": job["branch"], **history }] else: app["merge_activity"][repo_url].append({ "branch": job["branch"], **history }) logging.info(f"collecting dashdotdb information for {app_name}") app_namespaces = [] for namespace in namespaces: if namespace['app']['name'] != app['name']: continue app_namespaces.append(namespace) vuln_mx = {} validt_mx = {} slo_mx = {} for family in text_string_to_metric_families(metrics): for sample in family.samples: if sample.name == 'imagemanifestvuln_total': for app_namespace in app_namespaces: cluster = sample.labels['cluster'] if app_namespace['cluster']['name'] != cluster: continue namespace = sample.labels['namespace'] if app_namespace['name'] != namespace: continue severity = sample.labels['severity'] if cluster not in vuln_mx: vuln_mx[cluster] = {} if namespace not in vuln_mx[cluster]: vuln_mx[cluster][namespace] = {} if severity not in vuln_mx[cluster][namespace]: value = int(sample.value) vuln_mx[cluster][namespace][severity] = value if sample.name == 'deploymentvalidation_total': for app_namespace in app_namespaces: cluster = sample.labels['cluster'] if app_namespace['cluster']['name'] != cluster: continue namespace = sample.labels['namespace'] if app_namespace['name'] != namespace: continue validation = sample.labels['validation'] # dvo: fail == 1, pass == 0, py: true == 1, false == 0 # so: ({false|pass}, {true|fail}) status = ('Passed', 'Failed')[int(sample.labels['status'])] if cluster not in validt_mx: validt_mx[cluster] = {} if namespace not in validt_mx[cluster]: validt_mx[cluster][namespace] = {} if validation not in validt_mx[cluster][namespace]: validt_mx[cluster][namespace][validation] = {} if status not in validt_mx[cluster][namespace][validation]: # noqa: E501 validt_mx[cluster][namespace][validation][status] = {} # noqa: E501 value = int(sample.value) validt_mx[cluster][namespace][validation][status] = value # noqa: E501 if sample.name == 'serviceslometrics': for app_namespace in app_namespaces: cluster = sample.labels['cluster'] if app_namespace['cluster']['name'] != cluster: continue namespace = sample.labels['namespace'] if app_namespace['name'] != namespace: continue slo_name = sample.labels['name'] if cluster not in slo_mx: slo_mx[cluster] = {} if namespace not in slo_mx[cluster]: slo_mx[cluster][namespace] = {} if slo_name not in slo_mx[cluster][namespace]: slo_mx[cluster][namespace][slo_name] = { sample.labels['type']: sample.value } else: slo_mx[cluster][namespace][slo_name].update({ sample.labels['type']: sample.value }) app['container_vulnerabilities'] = vuln_mx app['deployment_validations'] = validt_mx app['service_slo'] = slo_mx return apps
class SaasHerder(): """Wrapper around SaaS deployment actions.""" def __init__(self, saas_files, thread_pool_size, gitlab, integration, integration_version, settings, jenkins_map=None, accounts=None, validate=False): self.saas_files = saas_files if validate: self._validate_saas_files() if not self.valid: return self.thread_pool_size = thread_pool_size self.gitlab = gitlab self.integration = integration self.integration_version = integration_version self.settings = settings self.secret_reader = SecretReader(settings=settings) self.namespaces = self._collect_namespaces() self.jenkins_map = jenkins_map # each namespace is in fact a target, # so we can use it to calculate. divisor = len(self.namespaces) or 1 self.available_thread_pool_size = \ threaded.estimate_available_thread_pool_size( self.thread_pool_size, divisor) # if called by a single saas file,it may # specify that it manages resources exclusively. self.take_over = self._get_saas_file_attribute('takeover') self.compare = self._get_saas_file_attribute('compare') self.publish_job_logs = self._get_saas_file_attribute('publishJobLogs') if accounts: self._initiate_state(accounts) def _get_saas_file_attribute(self, attribute): return len(self.saas_files) == 1 and self.saas_files[0].get(attribute) def _validate_saas_files(self): self.valid = True saas_file_name_path_map = {} saas_file_promotion_publish_channels = [] for saas_file in self.saas_files: saas_file_name = saas_file['name'] saas_file_path = saas_file['path'] saas_file_name_path_map.setdefault(saas_file_name, []) saas_file_name_path_map[saas_file_name].append(saas_file_path) saas_file_owners = [u['org_username'] for r in saas_file['roles'] for u in r['users']] if not saas_file_owners: msg = 'saas file {} has no owners: {}' logging.error(msg.format(saas_file_name, saas_file_path)) self.valid = False for resource_template in saas_file['resourceTemplates']: resource_template_name = resource_template['name'] for target in resource_template['targets']: # promotion publish channels promotion = target.get('promotion') if promotion: publish = promotion.get('publish') if publish: saas_file_promotion_publish_channels.extend( publish) # validate target parameters target_parameters = target['parameters'] if not target_parameters: continue target_parameters = json.loads(target_parameters) target_namespace = target['namespace'] namespace_name = target_namespace['name'] cluster_name = target_namespace['cluster']['name'] environment = target_namespace['environment'] environment_name = environment['name'] environment_parameters = environment['parameters'] if not environment_parameters: continue environment_parameters = \ json.loads(environment_parameters) msg = \ f'[{saas_file_name}/{resource_template_name}] ' + \ 'parameter found in target ' + \ f'{cluster_name}/{namespace_name} ' + \ f'should be reused from env {environment_name}' for t_key, t_value in target_parameters.items(): if not isinstance(t_value, str): continue for e_key, e_value in environment_parameters.items(): if not isinstance(e_value, str): continue if '.' not in e_value: continue if e_value not in t_value: continue if t_key == e_key and t_value == e_value: details = \ f'consider removing {t_key}' else: replacement = t_value.replace( e_value, '${' + e_key + '}' ) details = \ f'target: \"{t_key}: {t_value}\". ' + \ f'env: \"{e_key}: {e_value}\". ' + \ f'consider \"{t_key}: {replacement}\"' logging.warning(f'{msg}: {details}') # saas file name duplicates duplicates = {saas_file_name: saas_file_paths for saas_file_name, saas_file_paths in saas_file_name_path_map.items() if len(saas_file_paths) > 1} if duplicates: self.valid = False msg = 'saas file name {} is not unique: {}' for saas_file_name, saas_file_paths in duplicates.items(): logging.error(msg.format(saas_file_name, saas_file_paths)) # promotion publish channel duplicates duplicates = [p for p in saas_file_promotion_publish_channels if saas_file_promotion_publish_channels.count(p) > 1] if duplicates: self.valid = False msg = 'saas file promotion publish channel is not unique: {}' for duplicate in duplicates: logging.error(msg.format(duplicate)) def _collect_namespaces(self): # namespaces may appear more then once in the result namespaces = [] for saas_file in self.saas_files: managed_resource_types = saas_file['managedResourceTypes'] resource_templates = saas_file['resourceTemplates'] for rt in resource_templates: targets = rt['targets'] for target in targets: namespace = target['namespace'] if target.get('disable'): logging.debug( f"[{saas_file['name']}/{rt['name']}] target " + f"{namespace['cluster']['name']}/" + f"{namespace['name']} is disabled.") continue # managedResourceTypes is defined per saas_file # add it to each namespace in the current saas_file namespace['managedResourceTypes'] = managed_resource_types namespaces.append(namespace) return namespaces def _initiate_state(self, accounts): self.state = State( integration=self.integration, accounts=accounts, settings=self.settings ) @staticmethod def _collect_parameters(container): parameters = container.get('parameters') or {} if isinstance(parameters, str): parameters = json.loads(parameters) # adjust Python's True/False for k, v in parameters.items(): if v is True: parameters[k] = 'true' elif v is False: parameters[k] = 'false' elif any([isinstance(v, t) for t in [dict, list, tuple]]): parameters[k] = json.dumps(v) return parameters @staticmethod def _get_file_contents_github(repo, path, commit_sha): try: f = repo.get_contents(path, commit_sha) return f.decoded_content except GithubException as e: # slightly copied with love from # https://github.com/PyGithub/PyGithub/issues/661 errors = e.data['errors'] # example errors dict that we are looking for # { # 'message': '<text>', # 'errors': [{ # 'resource': 'Blob', # 'field': 'data', # 'code': 'too_large' # }], # 'documentation_url': '<url>' # } for error in errors: if error['code'] == 'too_large': # get large files tree = repo.get_git_tree( commit_sha, recursive='/' in path).tree for x in tree: if x.path != path.lstrip('/'): continue blob = repo.get_git_blob(x.sha) return base64.b64decode(blob.content).decode("utf8") raise e @retry() def _get_file_contents(self, options): url = options['url'] path = options['path'] ref = options['ref'] github = options['github'] html_url = f"{url}/blob/{ref}{path}" commit_sha = self._get_commit_sha(options) content = None if 'github' in url: repo_name = url.rstrip("/").replace('https://github.com/', '') repo = github.get_repo(repo_name) content = self._get_file_contents_github(repo, path, commit_sha) elif 'gitlab' in url: if not self.gitlab: raise Exception('gitlab is not initialized') project = self.gitlab.get_project(url) f = project.files.get(file_path=path.lstrip('/'), ref=commit_sha) content = f.decode() return yaml.safe_load(content), html_url, commit_sha @retry() def _get_directory_contents(self, options): url = options['url'] path = options['path'] ref = options['ref'] github = options['github'] html_url = f"{url}/tree/{ref}{path}" commit_sha = self._get_commit_sha(options) resources = [] if 'github' in url: repo_name = url.rstrip("/").replace('https://github.com/', '') repo = github.get_repo(repo_name) for f in repo.get_contents(path, commit_sha): file_path = os.path.join(path, f.name) file_contents_decoded = \ self._get_file_contents_github( repo, file_path, commit_sha) resource = yaml.safe_load(file_contents_decoded) resources.append(resource) elif 'gitlab' in url: if not self.gitlab: raise Exception('gitlab is not initialized') project = self.gitlab.get_project(url) for f in project.repository_tree(path=path.lstrip('/'), ref=commit_sha, all=True): file_contents = \ project.files.get(file_path=f['path'], ref=commit_sha) resource = yaml.safe_load(file_contents.decode()) resources.append(resource) return resources, html_url, commit_sha @retry() def _get_commit_sha(self, options): url = options['url'] ref = options['ref'] github = options['github'] hash_length = options.get('hash_length') commit_sha = '' if 'github' in url: repo_name = url.rstrip("/").replace('https://github.com/', '') repo = github.get_repo(repo_name) commit = repo.get_commit(sha=ref) commit_sha = commit.sha elif 'gitlab' in url: if not self.gitlab: raise Exception('gitlab is not initialized') project = self.gitlab.get_project(url) commits = project.commits.list(ref_name=ref) commit_sha = commits[0].id if hash_length: return commit_sha[:hash_length] return commit_sha @staticmethod def _get_cluster_and_namespace(target): cluster = target['namespace']['cluster']['name'] namespace = target['namespace']['name'] return cluster, namespace @staticmethod def _additional_resource_process(resources, html_url): for resource in resources: # add a definition annotation to each PrometheusRule rule if resource['kind'] == 'PrometheusRule': try: groups = resource['spec']['groups'] for group in groups: rules = group['rules'] for rule in rules: annotations = rule.get('annotations') if not annotations: continue rule['annotations']['html_url'] = html_url except Exception: logging.warning( 'could not add html_url annotation to' + resource['name']) def _process_template(self, options): saas_file_name = options['saas_file_name'] resource_template_name = options['resource_template_name'] url = options['url'] path = options['path'] provider = options['provider'] target = options['target'] github = options['github'] target_ref = target['ref'] target_promotion = target.get('promotion') or {} resources = None html_url = None commit_sha = None if provider == 'openshift-template': hash_length = options['hash_length'] parameters = options['parameters'] environment = target['namespace']['environment'] environment_parameters = self._collect_parameters(environment) target_parameters = self._collect_parameters(target) consolidated_parameters = {} consolidated_parameters.update(environment_parameters) consolidated_parameters.update(parameters) consolidated_parameters.update(target_parameters) for replace_key, replace_value in consolidated_parameters.items(): if not isinstance(replace_value, str): continue replace_pattern = '${' + replace_key + '}' for k, v in consolidated_parameters.items(): if not isinstance(v, str): continue if replace_pattern in v: consolidated_parameters[k] = \ v.replace(replace_pattern, replace_value) get_file_contents_options = { 'url': url, 'path': path, 'ref': target_ref, 'github': github } try: template, html_url, commit_sha = \ self._get_file_contents(get_file_contents_options) except Exception as e: logging.error( f"[{url}/{path}:{target_ref}] " + f"error fetching template: {str(e)}") return None, None if "IMAGE_TAG" not in consolidated_parameters: template_parameters = template.get('parameters') if template_parameters is not None: for template_parameter in template_parameters: if template_parameter['name'] == 'IMAGE_TAG': # add IMAGE_TAG only if it is required image_tag = commit_sha[:hash_length] consolidated_parameters['IMAGE_TAG'] = image_tag oc = OC('server', 'token', local=True) try: resources = oc.process(template, consolidated_parameters) except StatusCodeError as e: logging.error( f"[{saas_file_name}/{resource_template_name}] " + f"{html_url}: error processing template: {str(e)}") elif provider == 'directory': get_directory_contents_options = { 'url': url, 'path': path, 'ref': target_ref, 'github': github } try: resources, html_url, commit_sha = \ self._get_directory_contents( get_directory_contents_options) except Exception as e: logging.error( f"[{url}/{path}:{target_ref}] " + f"error fetching directory: {str(e)}") return None, None else: logging.error( f"[{saas_file_name}/{resource_template_name}] " + f"unknown provider: {provider}") target_promotion['commit_sha'] = commit_sha return resources, html_url, target_promotion @staticmethod def _collect_images(resource): images = set() # resources with pod templates try: template = resource["spec"]["template"] for c in template["spec"]["containers"]: images.add(c["image"]) except KeyError: pass # init containers try: template = resource["spec"]["template"] for c in template["spec"]["initContainers"]: images.add(c["image"]) except KeyError: pass # CronJob try: template = resource["spec"]["jobTemplate"]["spec"]["template"] for c in template["spec"]["containers"]: images.add(c["image"]) except KeyError: pass # CatalogSource templates try: images.add(resource["spec"]["image"]) except KeyError: pass return images @staticmethod def _check_image(image, image_patterns, image_auth, error_prefix): error = False if image_patterns and \ not any(image.startswith(p) for p in image_patterns): error = True logging.error( f"{error_prefix} Image is not in imagePatterns: {image}") try: valid = Image(image, **image_auth) if not valid: error = True logging.error( f"{error_prefix} Image does not exist: {image}") except Exception as e: error = True logging.error(f"{error_prefix} Image is invalid: {image}. " + f"details: {str(e)}") return error def _check_images(self, options): saas_file_name = options['saas_file_name'] resource_template_name = options['resource_template_name'] html_url = options['html_url'] resources = options['resources'] image_auth = options['image_auth'] image_patterns = options['image_patterns'] error_prefix = \ f"[{saas_file_name}/{resource_template_name}] {html_url}:" images_list = threaded.run(self._collect_images, resources, self.available_thread_pool_size) images = {item for sublist in images_list for item in sublist} if not images: return False # no errors errors = threaded.run(self._check_image, images, self.available_thread_pool_size, image_patterns=image_patterns, image_auth=image_auth, error_prefix=error_prefix) error = True in errors return error def _initiate_github(self, saas_file): auth = saas_file.get('authentication') or {} auth_code = auth.get('code') or {} if auth_code: token = self.secret_reader.read(auth_code) else: # use the app-sre token by default default_org_name = 'app-sre' config = get_config(desired_org_name=default_org_name) token = config['github'][default_org_name]['token'] base_url = os.environ.get('GITHUB_API', 'https://api.github.com') return Github(token, base_url=base_url) def _initiate_image_auth(self, saas_file): """ This function initiates a dict required for image authentication. This dict will be used as kwargs for sertoolbox's Image. The image authentication secret specified in the saas file must contain the 'user' and 'token' keys, and may optionally contain a 'url' key specifying the image registry url to be passed to check if an image should be checked using these credentials. The function returns the keys extracted from the secret in the structure expected by sretoolbox's Image: 'user' --> 'username' 'token' --> 'password' 'url' --> 'auth_server' (optional) """ auth = saas_file.get('authentication') if not auth: return {} auth_image_secret = auth.get('image') if not auth_image_secret: return {} creds = self.secret_reader.read_all(auth_image_secret) required_keys = ['user', 'token'] ok = all(k in creds.keys() for k in required_keys) if not ok: logging.warning( "the specified image authentication secret " + f"found in path {auth_image_secret['path']} " + f"does not contain all required keys: {required_keys}" ) return {} image_auth = { 'username': creds['user'], 'password': creds['token'] } url = creds.get('url') if url: image_auth['auth_server']: url return image_auth def populate_desired_state(self, ri): results = threaded.run(self.init_populate_desired_state_specs, self.saas_files, self.thread_pool_size) desired_state_specs = \ [item for sublist in results for item in sublist] promotions = threaded.run(self.populate_desired_state_saas_file, desired_state_specs, self.thread_pool_size, ri=ri) self.promotions = promotions def init_populate_desired_state_specs(self, saas_file): specs = [] saas_file_name = saas_file['name'] github = self._initiate_github(saas_file) image_auth = self._initiate_image_auth(saas_file) instance_name = saas_file['instance']['name'] managed_resource_types = saas_file['managedResourceTypes'] image_patterns = saas_file['imagePatterns'] resource_templates = saas_file['resourceTemplates'] saas_file_parameters = self._collect_parameters(saas_file) # iterate over resource templates (multiple per saas_file) for rt in resource_templates: rt_name = rt['name'] url = rt['url'] path = rt['path'] provider = rt.get('provider') or 'openshift-template' hash_length = rt.get('hash_length') or self.settings['hashLength'] parameters = self._collect_parameters(rt) consolidated_parameters = {} consolidated_parameters.update(saas_file_parameters) consolidated_parameters.update(parameters) # iterate over targets (each target is a namespace) for target in rt['targets']: if target.get('disable'): # a warning is logged during SaasHerder initiation continue cluster, namespace = \ self._get_cluster_and_namespace(target) process_template_options = { 'saas_file_name': saas_file_name, 'resource_template_name': rt_name, 'url': url, 'path': path, 'provider': provider, 'hash_length': hash_length, 'target': target, 'parameters': consolidated_parameters, 'github': github } check_images_options_base = { 'saas_file_name': saas_file_name, 'resource_template_name': rt_name, 'image_auth': image_auth, 'image_patterns': image_patterns } spec = { 'saas_file_name': saas_file_name, 'cluster': cluster, 'namespace': namespace, 'managed_resource_types': managed_resource_types, 'process_template_options': process_template_options, 'check_images_options_base': check_images_options_base, 'instance_name': instance_name, 'upstream': target.get('upstream') } specs.append(spec) return specs def populate_desired_state_saas_file(self, spec, ri): saas_file_name = spec['saas_file_name'] cluster = spec['cluster'] namespace = spec['namespace'] managed_resource_types = spec['managed_resource_types'] process_template_options = spec['process_template_options'] check_images_options_base = spec['check_images_options_base'] instance_name = spec['instance_name'] upstream = spec['upstream'] resources, html_url, promotion = \ self._process_template(process_template_options) if resources is None: ri.register_error() return # filter resources resources = [resource for resource in resources if isinstance(resource, dict) and resource['kind'] in managed_resource_types] # additional processing of resources self._additional_resource_process(resources, html_url) # check images skip_check_images = upstream and self.jenkins_map and \ self.jenkins_map[instance_name].is_job_running(upstream) if skip_check_images: logging.warning( "skipping check_image since " + f"upstream job {upstream} is running" ) else: check_images_options = { 'html_url': html_url, 'resources': resources } check_images_options.update(check_images_options_base) image_error = self._check_images(check_images_options) if image_error: ri.register_error() return # add desired resources for resource in resources: resource_kind = resource['kind'] resource_name = resource['metadata']['name'] oc_resource = OR( resource, self.integration, self.integration_version, caller_name=saas_file_name, error_details=html_url) ri.add_desired( cluster, namespace, resource_kind, resource_name, oc_resource ) return promotion def get_moving_commits_diff(self, dry_run): results = threaded.run(self.get_moving_commits_diff_saas_file, self.saas_files, self.thread_pool_size, dry_run=dry_run) return [item for sublist in results for item in sublist] def get_moving_commits_diff_saas_file(self, saas_file, dry_run): saas_file_name = saas_file['name'] instace_name = saas_file['instance']['name'] github = self._initiate_github(saas_file) trigger_specs = [] for rt in saas_file['resourceTemplates']: rt_name = rt['name'] url = rt['url'] for target in rt['targets']: # don't trigger if there is a linked upstream job if target.get('upstream'): continue ref = target['ref'] get_commit_sha_options = { 'url': url, 'ref': ref, 'github': github } desired_commit_sha = \ self._get_commit_sha(get_commit_sha_options) # don't trigger on refs which are commit shas if ref == desired_commit_sha: continue namespace = target['namespace'] cluster_name = namespace['cluster']['name'] namespace_name = namespace['name'] env_name = namespace['environment']['name'] key = f"{saas_file_name}/{rt_name}/{cluster_name}/" + \ f"{namespace_name}/{env_name}/{ref}" current_commit_sha = self.state.get(key, None) # skip if there is no change in commit sha if current_commit_sha == desired_commit_sha: continue # don't trigger if this is the first time # this target is being deployed. # that will be taken care of by # openshift-saas-deploy-trigger-configs if current_commit_sha is None: # store the value to take over from now on if not dry_run: self.state.add(key, value=desired_commit_sha) continue # we finally found something we want to trigger on! job_spec = { 'saas_file_name': saas_file_name, 'env_name': env_name, 'instance_name': instace_name, 'rt_name': rt_name, 'cluster_name': cluster_name, 'namespace_name': namespace_name, 'ref': ref, 'commit_sha': desired_commit_sha } trigger_specs.append(job_spec) return trigger_specs def update_moving_commit(self, job_spec): saas_file_name = job_spec['saas_file_name'] env_name = job_spec['env_name'] rt_name = job_spec['rt_name'] cluster_name = job_spec['cluster_name'] namespace_name = job_spec['namespace_name'] ref = job_spec['ref'] commit_sha = job_spec['commit_sha'] key = f"{saas_file_name}/{rt_name}/{cluster_name}/" + \ f"{namespace_name}/{env_name}/{ref}" self.state.add(key, value=commit_sha, force=True) def get_configs_diff(self): results = threaded.run(self.get_configs_diff_saas_file, self.saas_files, self.thread_pool_size) return [item for sublist in results for item in sublist] def get_configs_diff_saas_file(self, saas_file): saas_file_name = saas_file['name'] saas_file_parameters = saas_file.get('parameters') saas_file_managed_resource_types = saas_file['managedResourceTypes'] instace_name = saas_file['instance']['name'] trigger_specs = [] for rt in saas_file['resourceTemplates']: rt_name = rt['name'] url = rt['url'] path = rt['path'] rt_parameters = rt.get('parameters') for desired_target_config in rt['targets']: namespace = desired_target_config['namespace'] cluster_name = namespace['cluster']['name'] namespace_name = namespace['name'] env_name = namespace['environment']['name'] desired_target_config['namespace'] = \ self.sanitize_namespace(namespace) # add parent parameters to target config desired_target_config['saas_file_parameters'] = \ saas_file_parameters # add managed resource types to target config desired_target_config['saas_file_managed_resource_types'] = \ saas_file_managed_resource_types desired_target_config['url'] = url desired_target_config['path'] = path desired_target_config['rt_parameters'] = rt_parameters # get current target config from state key = f"{saas_file_name}/{rt_name}/{cluster_name}/" + \ f"{namespace_name}/{env_name}" current_target_config = self.state.get(key, None) # skip if there is no change in target configuration if current_target_config == desired_target_config: continue job_spec = { 'saas_file_name': saas_file_name, 'env_name': env_name, 'instance_name': instace_name, 'rt_name': rt_name, 'cluster_name': cluster_name, 'namespace_name': namespace_name, 'target_config': desired_target_config } trigger_specs.append(job_spec) return trigger_specs @staticmethod def sanitize_namespace(namespace): """Only keep fields that should trigger a new job.""" new_job_fields = { 'namespace': ['name', 'cluster', 'app'], 'cluster': ['name', 'serverUrl'], 'app': ['name'] } namespace = {k: v for k, v in namespace.items() if k in new_job_fields['namespace']} cluster = namespace['cluster'] namespace['cluster'] = {k: v for k, v in cluster.items() if k in new_job_fields['cluster']} app = namespace['app'] namespace['app'] = {k: v for k, v in app.items() if k in new_job_fields['app']} return namespace def update_config(self, job_spec): saas_file_name = job_spec['saas_file_name'] env_name = job_spec['env_name'] rt_name = job_spec['rt_name'] cluster_name = job_spec['cluster_name'] namespace_name = job_spec['namespace_name'] target_config = job_spec['target_config'] key = f"{saas_file_name}/{rt_name}/{cluster_name}/" + \ f"{namespace_name}/{env_name}" self.state.add(key, value=target_config, force=True) def validate_promotions(self): """ If there were promotion sections in the participating saas files validate that the conditions are met. """ for item in self.promotions: # validate that the commit sha being promoted # was succesfully published to the subscribed channel(s) commit_sha = item['commit_sha'] subscribe = item.get('subscribe') if subscribe: for channel in subscribe: state_key = f"promotions/{channel}/{commit_sha}" value = self.state.get(state_key, None) success = value.get('success') if not success: logging.error( f'Commit {commit_sha} was not ' + f'published with success to channel {channel}' ) return False return True def publish_promotions(self, success, saas_files, mr_cli): """ If there were promotion sections in the participating saas files publish the results for future promotion validations. """ subscribe_saas_file_path_map = \ self._get_subscribe_saas_file_path_map(saas_files, auto_only=True) trigger_promotion = False for item in self.promotions: commit_sha = item['commit_sha'] publish = item.get('publish') if publish: all_subscribed_saas_file_paths = set() for channel in publish: # publish to state to pass promotion gate state_key = f"promotions/{channel}/{commit_sha}" value = { 'success': success } self.state.add(state_key, value, force=True) logging.info( f'Commit {commit_sha} was published ' + f'with success {success} to channel {channel}' ) # collect data to trigger promotion subscribed_saas_file_paths = \ subscribe_saas_file_path_map.get(channel) if subscribed_saas_file_paths: all_subscribed_saas_file_paths.update( subscribed_saas_file_paths) item['saas_file_paths'] = list(all_subscribed_saas_file_paths) if all_subscribed_saas_file_paths: trigger_promotion = True if trigger_promotion: mr = AutoPromoter(self.promotions) mr.submit(cli=mr_cli) @staticmethod def _get_subscribe_saas_file_path_map(saas_files, auto_only=False): """ Returns a dict with subscribe channels as keys and a list of paths of saas files containing these channels. """ subscribe_saas_file_path_map = {} for saas_file in saas_files: saas_file_path = 'data' + saas_file['path'] for rt in saas_file['resourceTemplates']: for target in rt['targets']: target_promotion = target.get('promotion') if not target_promotion: continue target_auto = target_promotion.get('auto') if auto_only and not target_auto: continue subscribe = target_promotion.get('subscribe') if not subscribe: continue for channel in subscribe: subscribe_saas_file_path_map.setdefault( channel, set()) subscribe_saas_file_path_map[channel].add( saas_file_path) return subscribe_saas_file_path_map
class EcrMirror: def __init__(self, instance, dry_run): self.dry_run = dry_run self.instance = instance self.settings = queries.get_app_interface_settings() self.secret_reader = SecretReader(settings=self.settings) self.skopeo_cli = Skopeo(dry_run) self.error = False identifier = instance['identifier'] account = instance['account'] region = instance.get('region') self.aws_cli = AWSApi(thread_pool_size=1, accounts=[self._get_aws_account_info(account)], settings=self.settings, init_ecr_auth_tokens=True) self.aws_cli.map_ecr_resources() self.ecr_uri = self._get_image_uri( account=account, repository=identifier, ) if self.ecr_uri is None: self.error = True LOG.error(f"Could not find the ECR repository {identifier}") self.ecr_username, self.ecr_password = self._get_ecr_creds( account=account, region=region, ) self.ecr_auth = f'{self.ecr_username}:{self.ecr_password}' self.image_username = None self.image_password = None self.image_auth = None pull_secret = self.instance['mirror']['pullCredentials'] if pull_secret is not None: raw_data = self.secret_reader.read_all(pull_secret) self.image_username = raw_data["user"] self.image_password = raw_data["token"] self.image_auth = f'{self.image_username}:{self.image_password}' def run(self): if self.error: return ecr_mirror = Image(self.ecr_uri, username=self.ecr_username, password=self.ecr_password) image = Image(self.instance['mirror']['url'], username=self.image_username, password=self.image_password) LOG.debug('[checking %s -> %s]', image, ecr_mirror) for tag in image: if tag not in ecr_mirror: try: self.skopeo_cli.copy(src_image=image[tag], src_creds=self.image_auth, dst_image=ecr_mirror[tag], dest_creds=self.ecr_auth) except SkopeoCmdError as details: LOG.error('[%s]', details) def _get_ecr_creds(self, account, region): if region is None: region = self.aws_cli.accounts[account]['resourcesDefaultRegion'] auth_token = f'{account}/{region}' data = self.aws_cli.auth_tokens[auth_token] auth_data = data['authorizationData'][0] token = auth_data['authorizationToken'] password = base64.b64decode(token).decode('utf-8').split(':')[1] return 'AWS', password def _get_image_uri(self, account, repository): for repo in self.aws_cli.resources[account]['ecr']: if repo['repositoryName'] == repository: return repo['repositoryUri'] @staticmethod def _get_aws_account_info(account): for account_info in queries.get_aws_accounts(): if 'name' not in account_info: continue if account_info['name'] != account: continue return account_info
def fetch_current_state(unleash_instance): api_url = f"{unleash_instance['url']}/api" secret_reader = SecretReader(settings=queries.get_app_interface_settings()) admin_access_token = \ secret_reader.read(unleash_instance['token']) return get_feature_toggles(api_url, admin_access_token)
class QuayMirror: GCR_PROJECT_CATALOG_QUERY = """ { projects: gcp_projects_v1 { name pushCredentials { path field } } } """ GCR_REPOS_QUERY = """ { apps: apps_v1 { gcrRepos { project { name } items { name mirror { url pullCredentials { path field } tags tagsExclude } } } } } """ def __init__(self, dry_run=False): self.dry_run = dry_run self.gqlapi = gql.get_api() settings = queries.get_app_interface_settings() self.secret_reader = SecretReader(settings=settings) self.skopeo_cli = Skopeo(dry_run) self.push_creds = self._get_push_creds() def run(self): sync_tasks = self.process_sync_tasks() for org, data in sync_tasks.items(): for item in data: try: self.skopeo_cli.copy(src_image=item['mirror_url'], src_creds=item['mirror_creds'], dst_image=item['image_url'], dest_creds=self.push_creds[org]) except SkopeoCmdError as details: _LOG.error('[%s]', details) def process_repos_query(self): result = self.gqlapi.query(self.GCR_REPOS_QUERY) summary = defaultdict(list) for app in result['apps']: gcr_repos = app.get('gcrRepos') if gcr_repos is None: continue for gcr_repo in gcr_repos: project = gcr_repo['project']['name'] server_url = gcr_repo['project'].get('serverUrl') or 'gcr.io' for item in gcr_repo['items']: if item['mirror'] is None: continue summary[project].append({ 'name': item["name"], 'mirror': item['mirror'], 'server_url': server_url }) return summary @staticmethod def sync_tag(tags, tags_exclude, candidate): if tags is not None: for tag in tags: if re.match(tag, candidate): return True # When tags is defined, we don't look at # tags_exclude return False if tags_exclude is not None: for tag_exclude in tags_exclude: if re.match(tag_exclude, candidate): return False return True # Both tags and tags_exclude are None, so # tag must be synced return True def process_sync_tasks(self): eight_hours = 28800 # 60 * 60 * 8 is_deep_sync = self._is_deep_sync(interval=eight_hours) summary = self.process_repos_query() sync_tasks = defaultdict(list) for org, data in summary.items(): for item in data: image = Image(f'{item["server_url"]}/{org}/{item["name"]}') mirror_url = item['mirror']['url'] username = None password = None mirror_creds = None if item['mirror']['pullCredentials'] is not None: pull_credentials = item['mirror']['pullCredentials'] raw_data = self.secret_reader.read_all(pull_credentials) username = raw_data["user"] password = raw_data["token"] mirror_creds = f'{username}:{password}' image_mirror = Image(mirror_url, username=username, password=password) tags = item['mirror'].get('tags') tags_exclude = item['mirror'].get('tagsExclude') for tag in image_mirror: if not self.sync_tag(tags=tags, tags_exclude=tags_exclude, candidate=tag): continue upstream = image_mirror[tag] downstream = image[tag] if tag not in image: _LOG.debug('Image %s and mirror %s are out off sync', downstream, upstream) sync_tasks[org].append({ 'mirror_url': str(upstream), 'mirror_creds': mirror_creds, 'image_url': str(downstream) }) continue # Deep (slow) check only in non dry-run mode if self.dry_run: _LOG.debug('Image %s and mirror %s are in sync', downstream, upstream) continue # Deep (slow) check only from time to time if not is_deep_sync: _LOG.debug('Image %s and mirror %s are in sync', downstream, upstream) continue try: if downstream == upstream: _LOG.debug('Image %s and mirror %s are in sync', downstream, upstream) continue except ImageComparisonError as details: _LOG.error('[%s]', details) continue _LOG.debug('Image %s and mirror %s are out of sync', downstream, upstream) sync_tasks[org].append({ 'mirror_url': str(upstream), 'mirror_creds': mirror_creds, 'image_url': str(downstream) }) return sync_tasks def _is_deep_sync(self, interval): control_file_name = 'qontract-reconcile-gcr-mirror.timestamp' control_file_path = os.path.join(tempfile.gettempdir(), control_file_name) try: with open(control_file_path, 'r') as file_obj: last_deep_sync = float(file_obj.read()) except FileNotFoundError: self._record_timestamp(control_file_path) return True next_deep_sync = last_deep_sync + interval if time.time() >= next_deep_sync: self._record_timestamp(control_file_path) return True return False @staticmethod def _record_timestamp(path): with open(path, 'w') as file_object: file_object.write(str(time.time())) def _get_push_creds(self): result = self.gqlapi.query(self.GCR_PROJECT_CATALOG_QUERY) creds = {} for project_data in result['projects']: push_secret = project_data['pushCredentials'] if push_secret is None: continue raw_data = self.secret_reader.read_all(push_secret) project = project_data['name'] token = base64.b64decode(raw_data["token"]).decode() creds[project] = f'{raw_data["user"]}:{token}' return creds
class JJB: """Wrapper around Jenkins Jobs""" def __init__(self, configs, ssl_verify=True, settings=None, print_only=False): self.settings = settings self.print_only = print_only if not print_only: self.secret_reader = SecretReader(settings=settings) self.collect_configs(configs) self.modify_logger() self.python_https_verify = str(int(ssl_verify)) def collect_configs(self, configs): gqlapi = gql.get_api() instances = \ {c['instance']['name']: { 'serverUrl': c['instance']['serverUrl'], 'token': c['instance']['token'], 'delete_method': c['instance']['deleteMethod']} for c in configs} working_dirs = {} instance_urls = {} for name, data in instances.items(): token = data['token'] server_url = data['serverUrl'] wd = tempfile.mkdtemp() ini = JJB_INI if not self.print_only: ini = self.secret_reader.read(token) ini = ini.replace('"', '') ini = ini.replace('false', 'False') ini_file_path = '{}/{}.ini'.format(wd, name) with open(ini_file_path, 'w') as f: f.write(ini) f.write('\n') working_dirs[name] = wd instance_urls[name] = server_url self.sort(configs) for c in configs: instance_name = c['instance']['name'] config = c['config'] config_file_path = \ '{}/config.yaml'.format(working_dirs[instance_name]) if config: with open(config_file_path, 'a') as f: yaml.dump(yaml.load(config, Loader=yaml.FullLoader), f) f.write('\n') else: config_path = c['config_path'] # get config data try: config_resource = gqlapi.get_resource(config_path) config = config_resource['content'] except gql.GqlGetResourceError as e: raise FetchResourceError(str(e)) with open(config_file_path, 'a') as f: f.write(config) f.write('\n') self.instances = instances self.instance_urls = instance_urls self.working_dirs = working_dirs def overwrite_configs(self, configs): """ This function will override the existing config files in the working directories with the supplied configs """ for name, wd in self.working_dirs.items(): config_path = '{}/config.yaml'.format(wd) with open(config_path, 'w') as f: f.write(configs[name]) def sort(self, configs): configs.sort(key=self.sort_by_name) configs.sort(key=self.sort_by_type) @staticmethod def sort_by_type(config): if config['type'] == 'defaults': return 0 elif config['type'] == 'global-defaults': return 5 elif config['type'] == 'views': return 10 elif config['type'] == 'secrets': return 20 elif config['type'] == 'base-templates': return 30 elif config['type'] == 'global-base-templates': return 35 elif config['type'] == 'job-templates': return 40 elif config['type'] == 'jobs': return 50 @staticmethod def sort_by_name(config): return config['name'] def get_configs(self): """ This function gets the configs from the working directories """ configs = {} for name, wd in self.working_dirs.items(): config_path = '{}/config.yaml'.format(wd) with open(config_path, 'r') as f: configs[name] = f.read() return configs def generate(self, io_dir, fetch_state): """ Generates job definitions from JJB configs :param io_dir: Input/output directory :param fetch_state: subdirectory to use ('desired' or 'current') """ for name, wd in self.working_dirs.items(): ini_path = '{}/{}.ini'.format(wd, name) config_path = '{}/config.yaml'.format(wd) output_dir = path.join(io_dir, 'jjb', fetch_state, name) args = [ '--conf', ini_path, 'test', config_path, '-o', output_dir, '--config-xml' ] self.execute(args) throughput.change_files_ownership(io_dir) def print_diffs(self, io_dir, instance_name=None): """ Print the diffs between the current and the desired job definitions """ current_path = path.join(io_dir, 'jjb', 'current') current_files = self.get_files(current_path, instance_name) desired_path = path.join(io_dir, 'jjb', 'desired') desired_files = self.get_files(desired_path, instance_name) create = self.compare_files(desired_files, current_files) delete = self.compare_files(current_files, desired_files) common = self.compare_files(desired_files, current_files, in_op=True) self.print_diff(create, desired_path, 'create') self.print_diff(delete, current_path, 'delete') self.print_diff(common, desired_path, 'update') def print_diff(self, files, replace_path, action): for f in files: if action == 'update': ft = self.toggle_cd(f) equal = filecmp.cmp(f, ft) if equal: continue instance, item, _ = f.replace(replace_path + '/', '').split('/') item_type = et.parse(f).getroot().tag item_type = item_type.replace('hudson.model.ListView', 'view') item_type = item_type.replace('project', 'job') logging.info([action, item_type, instance, item]) if action == 'update': with open(ft) as c, open(f) as d: clines = c.readlines() dlines = d.readlines() differ = difflib.Differ() diff = [ ln for ln in differ.compare(clines, dlines) if ln.startswith(('-', '+')) ] logging.debug("DIFF:\n" + "".join(diff)) def compare_files(self, from_files, subtract_files, in_op=False): return [ f for f in from_files if (self.toggle_cd(f) in subtract_files) is in_op ] @staticmethod def get_files(search_path, instance_name=None): if instance_name is not None: search_path = path.join(search_path, instance_name) return [ path.join(root, f) for root, _, files in os.walk(search_path) for f in files ] @staticmethod def toggle_cd(file_name): if 'desired' in file_name: return file_name.replace('desired', 'current') else: return file_name.replace('current', 'desired') def update(self): for name, wd in self.working_dirs.items(): ini_path = '{}/{}.ini'.format(wd, name) config_path = '{}/config.yaml'.format(wd) os.environ['PYTHONHTTPSVERIFY'] = self.python_https_verify cmd = ['jenkins-jobs', '--conf', ini_path, 'update', config_path] delete_method = self.instances[name]['delete_method'] if delete_method != 'manual': cmd.append('--delete-old') subprocess.call(cmd) @staticmethod def get_jjb(args): from jenkins_jobs.cli.entry import JenkinsJobs return JenkinsJobs(args) def execute(self, args): jjb = self.get_jjb(args) with self.toggle_logger(): jjb.execute() def modify_logger(self): yaml.warnings({'YAMLLoadWarning': False}) formatter = logging.Formatter('%(levelname)s: %(message)s') logger = logging.getLogger() logger.handlers[0].setFormatter(formatter) self.default_logging = logger.level @contextmanager def toggle_logger(self): logger = logging.getLogger() try: yield logger.setLevel(logging.ERROR) finally: logger.setLevel(self.default_logging) def cleanup(self): for wd in self.working_dirs.values(): shutil.rmtree(wd) @retry(exceptions=(JenkinsJobsException)) def get_jobs(self, wd, name): ini_path = '{}/{}.ini'.format(wd, name) config_path = '{}/config.yaml'.format(wd) args = ['--conf', ini_path, 'test', config_path] jjb = self.get_jjb(args) builder = JenkinsManager(jjb.jjb_config) registry = ModuleRegistry(jjb.jjb_config, builder.plugins_list) parser = YamlParser(jjb.jjb_config) parser.load_files(jjb.options.path) jobs, _ = parser.expandYaml(registry, jjb.options.names) return jobs def get_job_webhooks_data(self): job_webhooks_data = {} for name, wd in self.working_dirs.items(): jobs = self.get_jobs(wd, name) for job in jobs: try: project_url_raw = job['properties'][0]['github']['url'] if 'https://github.com' in project_url_raw: continue job_url = \ '{}/project/{}'.format(self.instance_urls[name], job['name']) project_url = \ project_url_raw.strip('/').replace('.git', '') gitlab_triggers = job['triggers'][0]['gitlab'] mr_trigger = gitlab_triggers['trigger-merge-request'] trigger = 'mr' if mr_trigger else 'push' hook = { 'job_url': job_url, 'trigger': trigger, } job_webhooks_data.setdefault(project_url, []) job_webhooks_data[project_url].append(hook) except KeyError: continue return job_webhooks_data def get_repos(self): repos = set() for name, wd in self.working_dirs.items(): jobs = self.get_jobs(wd, name) for job in jobs: job_name = job['name'] try: repos.add(self.get_repo_url(job)) except KeyError: logging.debug('missing github url: {}'.format(job_name)) return repos def get_admins(self): admins = set() for name, wd in self.working_dirs.items(): jobs = self.get_jobs(wd, name) for j in jobs: try: admins_list = \ j['triggers'][0]['github-pull-request']['admin-list'] admins.update(admins_list) except (KeyError, TypeError): # no admins, that's fine pass return admins @staticmethod def get_repo_url(job): repo_url_raw = job['properties'][0]['github']['url'] return repo_url_raw.strip('/').replace('.git', '') def get_all_jobs(self, job_types=[''], instance_name=None, include_test=False): all_jobs = {} for name, wd in self.working_dirs.items(): if instance_name and name != instance_name: continue logging.debug(f'getting jobs from {name}') all_jobs[name] = [] jobs = self.get_jobs(wd, name) for job in jobs: job_name = job['name'] if not any(job_type in job_name for job_type in job_types): continue if not include_test and 'test' in job_name: continue # temporarily ignore openshift-saas-deploy jobs if job_name.startswith('openshift-saas-deploy'): continue all_jobs[name].append(job) return all_jobs def print_jobs(self, job_name=None): all_jobs = {} found = False for name, wd in self.working_dirs.items(): logging.debug(f'getting jobs from {name}') all_jobs[name] = [] jobs = self.get_jobs(wd, name) for job in jobs: if job_name is not None and job_name not in job['name']: continue all_jobs[name].append(job) found = True if not found: raise ValueError(f"job name {job_name} is not found") print(json.dumps(all_jobs, indent=2))
def get_apps_data(date, month_delta=1, thread_pool_size=10): apps = queries.get_apps() saas_files = queries.get_saas_files() jjb, _ = init_jjb() jenkins_map = jenkins_base.get_jenkins_map() time_limit = date - relativedelta(months=month_delta) timestamp_limit = int(time_limit.replace(tzinfo=timezone.utc).timestamp()) settings = queries.get_app_interface_settings() secret_reader = SecretReader(settings=settings) secret_content = secret_reader.read_all({"path": DASHDOTDB_SECRET}) dashdotdb_url = secret_content["url"] dashdotdb_user = secret_content["username"] dashdotdb_pass = secret_content["password"] auth = (dashdotdb_user, dashdotdb_pass) vuln_metrics = requests.get( f"{dashdotdb_url}/api/v1/imagemanifestvuln/metrics", auth=auth ).text validt_metrics = requests.get( f"{dashdotdb_url}/api/v1/deploymentvalidation/metrics", auth=auth ).text slo_metrics = requests.get( f"{dashdotdb_url}/api/v1/serviceslometrics/metrics", auth=auth ).text namespaces = queries.get_namespaces() build_jobs = jjb.get_all_jobs(job_types=["build"]) jobs_to_get = build_jobs.copy() saas_deploy_jobs = [] for saas_file in saas_files: saas_file_name = saas_file["name"] for template in saas_file["resourceTemplates"]: for target in template["targets"]: job = {} job["env"] = target["namespace"]["environment"]["name"] job["app"] = target["namespace"]["app"]["name"] job["cluster"] = target["namespace"]["cluster"]["name"] job["namespace"] = target["namespace"]["name"] job["name"] = get_openshift_saas_deploy_job_name( saas_file_name, job["env"], settings ) job["saas_file_name"] = saas_file_name job["instance"] = saas_file["instance"]["name"] saas_deploy_jobs.append(job) if job["instance"] not in jobs_to_get: jobs_to_get[job["instance"]] = [job] else: jobs_to_get[job["instance"]].append(job) job_history = get_build_history_pool( jenkins_map, jobs_to_get, timestamp_limit, thread_pool_size ) for app in apps: if not app["codeComponents"]: continue app_name = app["name"] logging.info(f"collecting post-deploy jobs " f"information for {app_name}") post_deploy_jobs = {} for saas_file in saas_files: if saas_file["app"]["name"] != app_name: continue resource_types = saas_file["managedResourceTypes"] # Only jobs of these types are expected to have a # further post-deploy job if not any( [ "Deployment" in resource_types, "DeploymentConfig" not in resource_types, ] ): continue for resource_template in saas_file["resourceTemplates"]: for target in resource_template["targets"]: cluster = target["namespace"]["cluster"]["name"] namespace = target["namespace"]["name"] post_deploy_jobs[cluster] = {} post_deploy_jobs[cluster][namespace] = False for saas_file in saas_files: if saas_file["app"]["name"] != app_name: continue resource_types = saas_file["managedResourceTypes"] if "Job" not in resource_types: continue for resource_template in saas_file["resourceTemplates"]: for target in resource_template["targets"]: cluster = target["namespace"]["cluster"]["name"] namespace = target["namespace"]["name"] # This block skips the check if the cluster/namespace # has no Deployment/DeploymentConfig job associated. if cluster not in post_deploy_jobs: continue if namespace not in post_deploy_jobs[cluster]: continue # Post-deploy job must depend on a openshift-saas-deploy # job if target["upstream"] is None: continue if target["upstream"].startswith("openshift-saas-deploy-"): post_deploy_jobs[cluster][namespace] = True app["post_deploy_jobs"] = post_deploy_jobs logging.info(f"collecting promotion history for {app_name}") app["promotions"] = {} for job in saas_deploy_jobs: if job["app"] != app_name: continue if job["name"] not in job_history: continue history = job_history[job["name"]] saas_file_name = job["saas_file_name"] if saas_file_name not in app["promotions"]: app["promotions"][saas_file_name] = [ { "env": job["env"], "cluster": job["cluster"], "namespace": job["namespace"], **history, } ] else: app["promotions"][saas_file_name].append( { "env": job["env"], "cluster": job["cluster"], "namespace": job["namespace"], **history, } ) logging.info(f"collecting merge activity for {app_name}") app["merge_activity"] = {} code_repos = [ c["url"] for c in app["codeComponents"] if c["resource"] == "upstream" ] for jobs in build_jobs.values(): for job in jobs: try: repo_url = get_repo_url(job) except KeyError: continue if repo_url not in code_repos: continue if job["name"] not in job_history: continue history = job_history[job["name"]] if repo_url not in app["merge_activity"]: app["merge_activity"][repo_url] = [ {"branch": job["branch"], **history} ] else: app["merge_activity"][repo_url].append( {"branch": job["branch"], **history} ) logging.info(f"collecting dashdotdb information for {app_name}") app_namespaces = [] for namespace in namespaces: if namespace["app"]["name"] != app["name"]: continue app_namespaces.append(namespace) vuln_mx = {} validt_mx = {} slo_mx = {} for family in text_string_to_metric_families(vuln_metrics): for sample in family.samples: if sample.name == "imagemanifestvuln_total": for app_namespace in app_namespaces: cluster = sample.labels["cluster"] if app_namespace["cluster"]["name"] != cluster: continue namespace = sample.labels["namespace"] if app_namespace["name"] != namespace: continue severity = sample.labels["severity"] if cluster not in vuln_mx: vuln_mx[cluster] = {} if namespace not in vuln_mx[cluster]: vuln_mx[cluster][namespace] = {} if severity not in vuln_mx[cluster][namespace]: value = int(sample.value) vuln_mx[cluster][namespace][severity] = value for family in text_string_to_metric_families(validt_metrics): for sample in family.samples: if sample.name == "deploymentvalidation_total": for app_namespace in app_namespaces: cluster = sample.labels["cluster"] if app_namespace["cluster"]["name"] != cluster: continue namespace = sample.labels["namespace"] if app_namespace["name"] != namespace: continue validation = sample.labels["validation"] # dvo: fail == 1, pass == 0, py: true == 1, false == 0 # so: ({false|pass}, {true|fail}) status = ("Passed", "Failed")[int(sample.labels["status"])] if cluster not in validt_mx: validt_mx[cluster] = {} if namespace not in validt_mx[cluster]: validt_mx[cluster][namespace] = {} if validation not in validt_mx[cluster][namespace]: validt_mx[cluster][namespace][validation] = {} if ( status not in validt_mx[cluster][namespace][validation] ): # noqa: E501 validt_mx[cluster][namespace][validation][ status ] = {} # noqa: E501 value = int(sample.value) validt_mx[cluster][namespace][validation][ status ] = value # noqa: E501 for family in text_string_to_metric_families(slo_metrics): for sample in family.samples: if sample.name == "serviceslometrics": for app_namespace in app_namespaces: cluster = sample.labels["cluster"] if app_namespace["cluster"]["name"] != cluster: continue namespace = sample.labels["namespace"] if app_namespace["name"] != namespace: continue slo_doc_name = sample.labels["slodoc"] slo_name = sample.labels["name"] if cluster not in slo_mx: slo_mx[cluster] = {} if namespace not in slo_mx[cluster]: slo_mx[cluster][namespace] = {} if ( slo_doc_name not in slo_mx[cluster][namespace] ): # pylint: disable=line-too-long # noqa: E501 slo_mx[cluster][namespace][slo_doc_name] = {} if ( slo_name not in slo_mx[cluster][namespace][slo_doc_name] ): # noqa: E501 slo_mx[cluster][namespace][slo_doc_name][ slo_name ] = { # noqa: E501 sample.labels["type"]: sample.value } else: slo_mx[cluster][namespace][slo_doc_name][slo_name].update( { # pylint: disable=line-too-long # noqa: E501 sample.labels["type"]: sample.value } ) app["container_vulnerabilities"] = vuln_mx app["deployment_validations"] = validt_mx app["service_slo"] = slo_mx return apps
class EcrMirror: def __init__(self, instance, dry_run): self.dry_run = dry_run self.instance = instance self.settings = queries.get_app_interface_settings() self.secret_reader = SecretReader(settings=self.settings) self.skopeo_cli = Skopeo(dry_run) self.error = False identifier = instance["identifier"] account = instance["account"] region = instance.get("region") self.aws_cli = AWSApi( thread_pool_size=1, accounts=[self._get_aws_account_info(account)], settings=self.settings, init_ecr_auth_tokens=True, ) self.aws_cli.map_ecr_resources() self.ecr_uri = self._get_image_uri( account=account, repository=identifier, ) if self.ecr_uri is None: self.error = True LOG.error(f"Could not find the ECR repository {identifier}") self.ecr_username, self.ecr_password = self._get_ecr_creds( account=account, region=region, ) self.ecr_auth = f"{self.ecr_username}:{self.ecr_password}" self.image_username = None self.image_password = None self.image_auth = None pull_secret = self.instance["mirror"]["pullCredentials"] if pull_secret is not None: raw_data = self.secret_reader.read_all(pull_secret) self.image_username = raw_data["user"] self.image_password = raw_data["token"] self.image_auth = f"{self.image_username}:{self.image_password}" def run(self): if self.error: return ecr_mirror = Image(self.ecr_uri, username=self.ecr_username, password=self.ecr_password) image = Image( self.instance["mirror"]["url"], username=self.image_username, password=self.image_password, ) LOG.debug("[checking %s -> %s]", image, ecr_mirror) for tag in image: if tag not in ecr_mirror: try: self.skopeo_cli.copy( src_image=image[tag], src_creds=self.image_auth, dst_image=ecr_mirror[tag], dest_creds=self.ecr_auth, ) except SkopeoCmdError as details: LOG.error("[%s]", details) def _get_ecr_creds(self, account, region): if region is None: region = self.aws_cli.accounts[account]["resourcesDefaultRegion"] auth_token = f"{account}/{region}" data = self.aws_cli.auth_tokens[auth_token] auth_data = data["authorizationData"][0] token = auth_data["authorizationToken"] password = base64.b64decode(token).decode("utf-8").split(":")[1] return "AWS", password def _get_image_uri(self, account, repository): for repo in self.aws_cli.resources[account]["ecr"]: if repo["repositoryName"] == repository: return repo["repositoryUri"] @staticmethod def _get_aws_account_info(account): for account_info in queries.get_aws_accounts(): if "name" not in account_info: continue if account_info["name"] != account: continue return account_info
def get_apps_data(date, month_delta=1): apps = queries.get_apps() jjb, _ = init_jjb() saas_jobs = jjb.get_all_jobs(job_types=['saas-deploy', 'promote-to-prod']) build_master_jobs = jjb.get_all_jobs(job_types=['build-master']) jenkins_map = jenkins_base.get_jenkins_map() time_limit = date - relativedelta(months=month_delta) timestamp_limit = \ int(time_limit.replace(tzinfo=timezone.utc).timestamp()) saas_build_history = \ get_build_history(jenkins_map, saas_jobs, timestamp_limit) build_master_build_history = \ get_build_history(jenkins_map, build_master_jobs, timestamp_limit) settings = queries.get_app_interface_settings() secret_reader = SecretReader(settings=settings) secret_content = secret_reader.read_all({'path': DASHDOTDB_SECRET}) dashdotdb_url = secret_content['url'] dashdotdb_user = secret_content['username'] dashdotdb_pass = secret_content['password'] metrics = requests.get(f'{dashdotdb_url}/api/v1/metrics', auth=(dashdotdb_user, dashdotdb_pass)).text namespaces = queries.get_namespaces() for app in apps: if not app['codeComponents']: continue app_name = app['name'] logging.info(f"collecting promotions for {app_name}") app['promotions'] = {} saas_repos = [ c['url'] for c in app['codeComponents'] if c['resource'] == 'saasrepo' ] for sr in saas_repos: sr_history = saas_build_history.get(sr) if not sr_history: continue successes = [h for h in sr_history if h == 'SUCCESS'] app['promotions'][sr] = (len(sr_history), len(successes)) logging.info(f"collecting merge activity for {app_name}") app['merge_activity'] = {} code_repos = [ c['url'] for c in app['codeComponents'] if c['resource'] == 'upstream' ] for cr in code_repos: cr_history = build_master_build_history.get(cr) if not cr_history: continue successes = [h for h in cr_history if h == 'SUCCESS'] app['merge_activity'][cr] = (len(cr_history), len(successes)) logging.info(f"collecting dashdotdb information for {app_name}") app_namespaces = [] for namespace in namespaces: if namespace['app']['name'] != app['name']: continue app_namespaces.append(namespace) vuln_mx = {} validt_mx = {} for family in text_string_to_metric_families(metrics): for sample in family.samples: if sample.name == 'imagemanifestvuln_total': for app_namespace in app_namespaces: cluster = sample.labels['cluster'] if app_namespace['cluster']['name'] != cluster: continue namespace = sample.labels['namespace'] if app_namespace['name'] != namespace: continue severity = sample.labels['severity'] if cluster not in vuln_mx: vuln_mx[cluster] = {} if namespace not in vuln_mx[cluster]: vuln_mx[cluster][namespace] = {} if severity not in vuln_mx[cluster][namespace]: value = int(sample.value) vuln_mx[cluster][namespace][severity] = value if sample.name == 'deploymentvalidation_total': for app_namespace in app_namespaces: cluster = sample.labels['cluster'] if app_namespace['cluster']['name'] != cluster: continue namespace = sample.labels['namespace'] if app_namespace['name'] != namespace: continue validation = sample.labels['validation'] # dvo: fail == 1, pass == 0, py: true == 1, false == 0 # so: ({false|pass}, {true|fail}) status = ('Passed', 'Failed')[sample.labels['status']] if cluster not in validt_mx: validt_mx[cluster] = {} if namespace not in validt_mx[cluster]: validt_mx[cluster][namespace] = {} if validation not in validt_mx[cluster][namespace]: validt_mx[cluster][namespace][validation] = {} if status not in validt_mx[cluster][namespace][ validation]: # noqa: E501 validt_mx[cluster][namespace][validation][ status] = {} # noqa: E501 value = int(sample.value) validt_mx[cluster][namespace][validation][ status] = value # noqa: E501 app['container_vulnerabilities'] = vuln_mx app['deployment_validations'] = validt_mx return apps
class QuayMirror: QUAY_ORG_CATALOG_QUERY = """ { quay_orgs: quay_orgs_v1 { name pushCredentials { path field } instance { name url } } } """ def __init__(self, dry_run=False): self.dry_run = dry_run self.gqlapi = gql.get_api() settings = queries.get_app_interface_settings() self.secret_reader = SecretReader(settings=settings) self.skopeo_cli = Skopeo(dry_run) self.push_creds = self._get_push_creds() def run(self): sync_tasks = self.process_sync_tasks() for org, data in sync_tasks.items(): for item in data: try: self.skopeo_cli.copy(src_image=item['mirror_url'], src_creds=item['mirror_creds'], dst_image=item['image_url'], dest_creds=self.push_creds[org]) except SkopeoCmdError as details: _LOG.error('[%s]', details) @staticmethod def process_repos_query(): apps = queries.get_quay_repos() summary = defaultdict(list) for app in apps: quay_repos = app.get('quayRepos') if quay_repos is None: continue for quay_repo in quay_repos: org = quay_repo['org']['name'] instance = quay_repo['org']['instance']['name'] server_url = quay_repo['org']['instance']['url'] for item in quay_repo['items']: if item['mirror'] is None: continue mirror_image = Image(item['mirror']['url']) if (mirror_image.registry == 'docker.io' and mirror_image.repository == 'library' and item['public']): _LOG.error("Image %s can't be mirrored to a public " "quay repository.", mirror_image) sys.exit(ExitCodes.ERROR) org_key = OrgKey(instance, org) summary[org_key].append({'name': item["name"], 'mirror': item['mirror'], 'server_url': server_url}) return summary @staticmethod def sync_tag(tags, tags_exclude, candidate): if tags is not None: for tag in tags: if re.match(tag, candidate): return True # When tags is defined, we don't look at # tags_exclude return False if tags_exclude is not None: for tag_exclude in tags_exclude: if re.match(tag_exclude, candidate): return False return True # Both tags and tags_exclude are None, so # tag must be synced return True def process_sync_tasks(self): eight_hours = 28800 # 60 * 60 * 8 is_deep_sync = self._is_deep_sync(interval=eight_hours) summary = self.process_repos_query() sync_tasks = defaultdict(list) for org_key, data in summary.items(): org = org_key.org_name for item in data: push_creds = self.push_creds[org_key].split(':') image = Image(f'{item["server_url"]}/{org}/{item["name"]}', username=push_creds[0], password=push_creds[1]) mirror_url = item['mirror']['url'] username = None password = None mirror_creds = None if item['mirror']['pullCredentials'] is not None: pull_credentials = item['mirror']['pullCredentials'] raw_data = self.secret_reader.read_all(pull_credentials) username = raw_data["user"] password = raw_data["token"] mirror_creds = f'{username}:{password}' image_mirror = Image(mirror_url, username=username, password=password) tags = item['mirror'].get('tags') tags_exclude = item['mirror'].get('tagsExclude') for tag in image_mirror: if not self.sync_tag(tags=tags, tags_exclude=tags_exclude, candidate=tag): continue upstream = image_mirror[tag] downstream = image[tag] if tag not in image: _LOG.debug('Image %s and mirror %s are out off sync', downstream, upstream) task = {'mirror_url': str(upstream), 'mirror_creds': mirror_creds, 'image_url': str(downstream)} sync_tasks[org_key].append(task) continue # Deep (slow) check only in non dry-run mode if self.dry_run: _LOG.debug('Image %s and mirror %s are in sync', downstream, upstream) continue # Deep (slow) check only from time to time if not is_deep_sync: _LOG.debug('Image %s and mirror %s are in sync', downstream, upstream) continue try: if downstream == upstream: _LOG.debug('Image %s and mirror %s are in sync', downstream, upstream) continue except ImageComparisonError as details: _LOG.error('[%s]', details) continue _LOG.debug('Image %s and mirror %s are out of sync', downstream, upstream) sync_tasks[org_key].append({'mirror_url': str(upstream), 'mirror_creds': mirror_creds, 'image_url': str(downstream)}) return sync_tasks def _is_deep_sync(self, interval): control_file_name = 'qontract-reconcile-quay-mirror.timestamp' control_file_path = os.path.join(tempfile.gettempdir(), control_file_name) try: with open(control_file_path, 'r') as file_obj: last_deep_sync = float(file_obj.read()) except FileNotFoundError: self._record_timestamp(control_file_path) return True next_deep_sync = last_deep_sync + interval if time.time() >= next_deep_sync: self._record_timestamp(control_file_path) return True return False @staticmethod def _record_timestamp(path): with open(path, 'w') as file_object: file_object.write(str(time.time())) def _get_push_creds(self): result = self.gqlapi.query(self.QUAY_ORG_CATALOG_QUERY) creds = {} for org_data in result['quay_orgs']: push_secret = org_data['pushCredentials'] if push_secret is None: continue raw_data = self.secret_reader.read_all(push_secret) org = org_data['name'] instance = org_data['instance']['name'] org_key = OrgKey(instance, org) creds[org_key] = f'{raw_data["user"]}:{raw_data["token"]}' return creds
def _get_automationtoken(self, tokenpath): autotoken_reader = SecretReader(settings=self.settings) token = autotoken_reader.read(tokenpath) return token
def __init__(self, token, settings=None): secret_reader = SecretReader(settings=settings) pd_api_key = secret_reader.read(token) pypd.api_key = pd_api_key self.init_users()
def get_tf_secrets(account: Account, secret_reader: SecretReader) -> Tuple[str, Dict]: account_name = account["name"] automation_token = account["automationToken"] secret = secret_reader.read_all(automation_token) return (account_name, secret)