def update_role_data(dynamo_table, account_number, role, current_policy, source='Scan', add_no_repo=True): """ Compare the current version of a policy for a role and what has been previously stored in Dynamo. - If current and new policy versions are different store the new version in Dynamo. Add any newly added permissions to temporary permission blacklist. Purge any old entries from permission blacklist. - Refresh the updated time on the role policy - If the role is completely new, store the first version in Dynamo - Updates the role with full history of policies, including current version Args: dynamo_table account_number role (Role): current role being updated current_policy (dict): representation of the current policy version source: Default 'Scan' but could be Repo, Rollback, etc Returns: None """ # policy_entry: source, discovered, policy stored_role = get_role_data(dynamo_table, role.role_id, fields=['OptOut', 'Policies']) if not stored_role: role_dict = store_initial_role_data(dynamo_table, role.arn, role.create_date, role.role_id, role.role_name, account_number, current_policy) role.set_attributes(role_dict) LOGGER.info('Added new role ({}): {}'.format(role.role_id, role.arn)) else: # is the policy list the same as the last we had? old_policy = stored_role['Policies'][-1]['Policy'] if current_policy != old_policy: add_new_policy_version(dynamo_table, role, current_policy, source) LOGGER.info( '{} has different inline policies than last time, adding to role store' .format(role.arn)) newly_added_permissions = find_newly_added_permissions( old_policy, current_policy) else: newly_added_permissions = set() if add_no_repo: update_no_repo_permissions(dynamo_table, role, newly_added_permissions) update_opt_out(dynamo_table, role) set_role_data(dynamo_table, role.role_id, {'Refreshed': datetime.datetime.utcnow().isoformat()}) role.policies = get_role_data(dynamo_table, role.role_id, fields=['Policies'])['Policies']
def schedule_repo(account_number, dynamo_table, config, hooks): """ Schedule a repo for a given account. Schedule repo for a time in the future (default 7 days) for any roles in the account with repoable permissions. """ scheduled_roles = [] roles = Roles([Role(get_role_data(dynamo_table, roleID)) for roleID in tqdm(role_ids_for_account(dynamo_table, account_number))]) scheduled_time = int(time.time()) + (86400 * config.get('repo_schedule_period_days', 7)) for role in roles: if role.repoable_permissions > 0 and not role.repo_scheduled: role.repo_scheduled = scheduled_time # freeze the scheduled perms to whatever is repoable right now set_role_data(dynamo_table, role.role_id, {'RepoScheduled': scheduled_time, 'ScheduledPerms': role.repoable_services}) scheduled_roles.append(role) LOGGER.info("Scheduled repo for {} days from now for account {} and these roles:\n\t{}".format( config.get('repo_schedule_period_days', 7), account_number, ', '.join([r.role_name for r in scheduled_roles]))) repokid.hooks.call_hooks(hooks, 'AFTER_SCHEDULE_REPO', {'roles': scheduled_roles})
def remove_opt_out(dynamo_table, message): role_id = dynamo.find_role_in_cache(dynamo_table, message.account, message.role_name) if not role_id: return ResponderReturn( successful=False, return_message="Unable to find role {} in account {}".format( message.role_name, message.account), ) role_data = dynamo.get_role_data(dynamo_table, role_id, fields=["OptOut"]) if "OptOut" not in role_data or not role_data["OptOut"]: return ResponderReturn( successful=False, return_message="Role {} in account {} wasn't opted out".format( message.role_name, message.account), ) else: dynamo.set_role_data(dynamo_table, role_id, {"OptOut": {}}) return ResponderReturn( successful=True, return_message="Cancelled opt-out for role {} in account {}". format(message.role_name, message.account), )
def update_no_repo_permissions(dynamo_table, role, newly_added_permissions): """ Update Dyanmo entry for newly added permissions. Any that were newly detected get added with an expiration date of now plus the config setting for 'repo_requirements': 'exclude_new_permissions_for_days'. Expired entries get deleted. Also update the role object with the new no-repo-permissions. Args: role newly_added_permissions (set) Returns: None """ current_ignored_permissions = get_role_data( dynamo_table, role.role_id, fields=['NoRepoPermissions']).get('NoRepoPermissions', {}) new_ignored_permissions = {} current_time = int(time.time()) new_perms_expire_time = current_time + ( 24 * 60 * 60 * CONFIG['repo_requirements'].get( 'exclude_new_permissions_for_days', 14)) # only copy non-expired items to the new dictionary for permission, expire_time in current_ignored_permissions.items(): if expire_time > current_time: new_ignored_permissions[permission] = current_ignored_permissions[ permission] for permission in newly_added_permissions: new_ignored_permissions[permission] = new_perms_expire_time role.no_repo_permissions = new_ignored_permissions set_role_data(dynamo_table, role.role_id, {'NoRepoPermissions': role.no_repo_permissions})
def cancel_scheduled_repo(account_number, dynamo_table, role_name=None, is_all=None): """ Cancel scheduled repo for a role in an account """ if not is_all and not role_name: LOGGER.error('Either a specific role to cancel or all must be provided') return if is_all: roles = Roles([Role(get_role_data(dynamo_table, roleID)) for roleID in role_ids_for_account(dynamo_table, account_number)]) # filter to show only roles that are scheduled roles = [role for role in roles if (role.repo_scheduled)] for role in roles: set_role_data(dynamo_table, role.role_id, {'RepoScheduled': 0, 'ScheduledPerms': []}) LOGGER.info('Canceled scheduled repo for roles: {}'.format(', '.join([role.role_name for role in roles]))) return role_id = find_role_in_cache(dynamo_table, account_number, role_name) if not role_id: LOGGER.warn('Could not find role with name {} in account {}'.format(role_name, account_number)) return role = Role(get_role_data(dynamo_table, role_id)) if not role.repo_scheduled: LOGGER.warn('Repo was not scheduled for role {} in account {}'.format(role.role_name, account_number)) return set_role_data(dynamo_table, role.role_id, {'RepoScheduled': 0, 'ScheduledPerms': []}) LOGGER.info('Successfully cancelled scheduled repo for role {} in account {}'.format(role.role_name, role.account))
def _update_role_data(role, dynamo_table, account_number, config, conn, hooks, source, add_no_repo=True): """ Perform a scaled down version of role update, this is used to get an accurate count of repoable permissions after a rollback or repo. Does update: - Policies - Aardvark data - Total permissions - Repoable permissions - Repoable services - Stats Does not update: - Filters - Active/inactive roles Args: role (Role) dynamo_table account_number conn (dict) source: repo, rollback, etc add_no_repo: if set to True newly discovered permissions will be added to no repo list Returns: None """ current_policies = get_role_inline_policies(role.as_dict(), **conn) or {} roledata.update_role_data(dynamo_table, account_number, role, current_policies, source=source, add_no_repo=add_no_repo) aardvark_data = _get_aardvark_data(config['aardvark_api_location'], arn=role.arn) if not aardvark_data: return role.aa_data = aardvark_data[role.arn] roledata._calculate_repo_scores( [role], config['filter_config']['AgeFilter']['minimum_age'], hooks) set_role_data( dynamo_table, role.role_id, { 'AAData': role.aa_data, 'TotalPermissions': role.total_permissions, 'RepoablePermissions': role.repoable_permissions, 'RepoableServices': role.repoable_services }) roledata.update_stats(dynamo_table, [role], source=source)
def opt_out(dynamo_table, message): if CONFIG: opt_out_period = CONFIG.get("opt_out_period_days", 90) else: opt_out_period = 90 if not message.reason or not message.requestor: return ResponderReturn( successful=False, return_message="Reason and requestor must be specified") role_id = dynamo.find_role_in_cache(dynamo_table, message.account, message.role_name) if not role_id: return ResponderReturn( successful=False, return_message="Unable to find role {} in account {}".format( message.role_name, message.account), ) role_data = dynamo.get_role_data(dynamo_table, role_id, fields=["OptOut"]) if "OptOut" in role_data and role_data["OptOut"]: timestr = time.strftime("%m/%d/%y", time.localtime(role_data["OptOut"]["expire"])) return ResponderReturn( successful=False, return_message= ("Role {} in account {} is already opted out by {} for reason {} " "until {}".format( message.role_name, message.account, role_data["OptOut"]["owner"], role_data["OptOut"]["reason"], timestr, )), ) else: current_dt = datetime.datetime.fromtimestamp(time.time()) expire_dt = current_dt + datetime.timedelta(opt_out_period) expire_epoch = int( (expire_dt - datetime.datetime(1970, 1, 1)).total_seconds()) new_opt_out = { "owner": message.requestor, "reason": message.reason, "expire": expire_epoch, } dynamo.set_role_data(dynamo_table, role_id, {"OptOut": new_opt_out}) return ResponderReturn( successful=True, return_message="Role {} in account {} opted-out until {}".format( message.role_name, message.account, expire_dt.strftime("%m/%d/%y")), )
def _schedule_repo(account_number, dynamo_table, config, hooks): """ Schedule a repo for a given account. Schedule repo for a time in the future (default 7 days) for any roles in the account with repoable permissions. """ scheduled_roles = [] roles = Roles([ Role.parse_obj(get_role_data(dynamo_table, roleID)) for roleID in tqdm(role_ids_for_account(dynamo_table, account_number)) ]) scheduled_time = int( time.time()) + (86400 * config.get("repo_schedule_period_days", 7)) for role in roles: if not role.aa_data: LOGGER.warning("Not scheduling %s; missing Access Advisor data", role.arn) continue if not role.repoable_permissions > 0: LOGGER.debug("Not scheduling %s; no repoable permissions", role.arn) continue if role.repo_scheduled: LOGGER.debug( "Not scheduling %s; already scheduled for %s", role.arn, role.repo_scheduled, ) continue role.repo_scheduled = scheduled_time # freeze the scheduled perms to whatever is repoable right now set_role_data( dynamo_table, role.role_id, { "RepoScheduled": scheduled_time, "ScheduledPerms": role.repoable_services, }, ) scheduled_roles.append(role) LOGGER.info( "Scheduled repo for {} days from now for account {} and these roles:\n\t{}" .format( config.get("repo_schedule_period_days", 7), account_number, ", ".join([r.role_name for r in scheduled_roles]), )) repokid.hooks.call_hooks(hooks, "AFTER_SCHEDULE_REPO", {"roles": scheduled_roles})
def update_opt_out(dynamo_table, role): """ Update opt-out object for a role - remove (set to empty dict) any entries that have expired Opt-out objects should have the form {'expire': xxx, 'owner': xxx, 'reason': xxx} Args: role Returns: None """ if role.opt_out and int(role.opt_out['expire']) < int(time.time()): set_role_data(dynamo_table, role.role_id, {'OptOut': {}})
def opt_out(dynamo_table, message): if CONFIG: opt_out_period = CONFIG.get('opt_out_period_days', 90) else: opt_out_period = 90 if not message.reason or not message.requestor: return ResponderReturn( successful=False, return_message='Reason and requestor must be specified') role_id = dynamo.find_role_in_cache(dynamo_table, message.account, message.role_name) if not role_id: return ResponderReturn( successful=False, return_message='Unable to find role {} in account {}'.format( message.role_name, message.account)) role_data = dynamo.get_role_data(dynamo_table, role_id, fields=['OptOut']) if 'OptOut' in role_data and role_data['OptOut']: timestr = time.strftime('%m/%d/%y', time.localtime(role_data['OptOut']['expire'])) return ResponderReturn( successful=False, return_message= ('Role {} in account {} is already opted out by {} for reason {} ' 'until {}'.format(message.role_name, message.account, role_data['OptOut']['owner'], role_data['OptOut']['reason'], timestr))) else: current_dt = datetime.datetime.fromtimestamp(time.time()) expire_dt = current_dt + datetime.timedelta(opt_out_period) expire_epoch = int( (expire_dt - datetime.datetime(1970, 1, 1)).total_seconds()) new_opt_out = { 'owner': message.requestor, 'reason': message.reason, 'expire': expire_epoch } dynamo.set_role_data(dynamo_table, role_id, {'OptOut': new_opt_out}) return ResponderReturn( successful=True, return_message='Role {} in account {} opted-out until {}'.format( message.role_name, message.account, expire_dt.strftime('%m/%d/%y')))
def cancel_scheduled_repo(account_number, role_name, dynamo_table): """ Cancel scheduled repo for a role in an account """ role_id = find_role_in_cache(dynamo_table, account_number, role_name) if not role_id: LOGGER.warn('Could not find role with name {} in account {}'.format(role_name, account_number)) return role = Role(get_role_data(dynamo_table, role_id)) if not role.repo_scheduled: LOGGER.warn('Repo was not scheduled for role {} in account {}'.format(role.role_name, account_number)) return set_role_data(dynamo_table, role.role_id, {'RepoScheduled': 0}) LOGGER.info('Successfully cancelled scheduled repo for role {} in account {}'.format(role.role_name, role.account))
def find_and_mark_inactive(dynamo_table, account_number, active_roles): """ Mark roles in the account that aren't currently active inactive. Do this by getting all roles in the account and subtracting the active roles, any that are left are inactive and should be marked thusly. Args: account_number (string) active_roles (set): the currently active roles discovered in the most recent scan Returns: None """ active_roles = set(active_roles) known_roles = set(role_ids_for_account(dynamo_table, account_number)) inactive_roles = known_roles - active_roles for roleID in inactive_roles: role_dict = get_role_data(dynamo_table, roleID, fields=['Active', 'Arn']) if role_dict.get('Active'): set_role_data(dynamo_table, roleID, {'Active': False})
def store(self, fields: Optional[List[str]] = None) -> None: create = False try: remote_role_data = Role(role_id=self.role_id, arn=self.arn) remote_role_data.fetch(fields=["LastUpdated"]) if (remote_role_data.last_updated and self.last_updated and remote_role_data.last_updated > self.last_updated): raise IntegrityError( "stored role has been updated since last fetch") except RoleNotFoundError: create = True self.last_updated = datetime.datetime.now() if create: # TODO: handle this case in set_role_data() to simplify logic here create_dynamodb_entry( self.dict(by_alias=True, exclude={"config", "_dirty", "_updated_fields"})) self._updated_fields = set() else: if fields: include_fields = set(fields) include_fields.add("last_updated") set_role_data( self.role_id, self.dict( include=include_fields, by_alias=True, exclude=self._default_exclude, ), ) self._updated_fields - set(fields) else: set_role_data( self.role_id, self.dict(by_alias=True, exclude=self._default_exclude), ) self._updated_fields = set() self._dirty = False
def remove_opt_out(dynamo_table, message): role_id = dynamo.find_role_in_cache(dynamo_table, message.account, message.role_name) if not role_id: return ResponderReturn( successful=False, return_message='Unable to find role {} in account {}'.format( message.role_name, message.account)) role_data = dynamo.get_role_data(dynamo_table, role_id, fields=['OptOut']) if 'OptOut' not in role_data or not role_data['OptOut']: return ResponderReturn( successful=False, return_message='Role {} in account {} wasn\'t opted out'.format( message.role_name, message.account)) else: dynamo.set_role_data(dynamo_table, role_id, {'OptOut': {}}) return ResponderReturn( successful=True, return_message='Cancelled opt-out for role {} in account {}'. format(message.role_name, message.account))
def schedule_repo(account_number, dynamo_table, config): """ Schedule a repo for a given account. Schedule repo for a time in the future (default 7 days) for any roles in the account with repoable permissions. """ scheduled_roles = [] roles = Roles([ Role(get_role_data(dynamo_table, roleID)) for roleID in tqdm(role_ids_for_account(dynamo_table, account_number)) ]) scheduled_time = int( time.time()) + (86400 * config.get('repo_schedule_period_days', 7)) for role in roles: if role.repoable_permissions > 0: set_role_data(dynamo_table, role.role_id, {'RepoScheduled': scheduled_time}) scheduled_roles.append(role.role_name) LOGGER.info( "Scheduled repo for {} days from now for these roles:\n\t{}".format( config.get('repo_schedule_period_days', 7), ', '.join([r for r in scheduled_roles])))
def _deal_with_policies(role, account_number, config, hooks, scheduled, role_name, dynamo_table, commit, continuing): errors = [] total_permissions, eligible_permissions = roledata._get_role_permissions( role) repoable_permissions = roledata._get_repoable_permissions( account_number, role.role_name, eligible_permissions, role.aa_data, role.no_repo_permissions, config["filter_config"]["AgeFilter"]["minimum_age"], hooks, ) # if this is a scheduled repo we need to filter out permissions that weren't previously scheduled if scheduled: repoable_permissions = roledata._filter_scheduled_repoable_perms( repoable_permissions, role.scheduled_perms) repoed_policies, deleted_policy_names = roledata._get_repoed_policy( role.policies[-1]["Policy"], repoable_permissions) if inline_policies_size_exceeds_maximum(repoed_policies): error = ( "Policies would exceed the AWS size limit after repo for role: {} in account {}. " "Please manually minify.".format(role_name, account_number)) LOGGER.error(error) errors.append(error) continuing = False # if we aren't repoing for some reason, unschedule the role if not continuing: set_role_data(dynamo_table, role.role_id, { "RepoScheduled": 0, "ScheduledPerms": [] }) return if not commit: log_deleted_and_repoed_policies(deleted_policy_names, repoed_policies, role_name, account_number) return conn = config["connection_iam"] conn["account_number"] = account_number for name in deleted_policy_names: error = delete_policy(name, role, account_number, conn) if error: LOGGER.error(error) errors.append(error) if repoed_policies: error = replace_policies(repoed_policies, role, account_number, conn) if error: LOGGER.error(error) errors.append(error) current_policies = get_role_inline_policies(role.as_dict(), **conn) or {} roledata.add_new_policy_version(dynamo_table, role, current_policies, "Repo") # regardless of whether we're successful we want to unschedule the repo set_role_data(dynamo_table, role.role_id, { "RepoScheduled": 0, "ScheduledPerms": [] }) repokid.hooks.call_hooks(hooks, "AFTER_REPO", { "role": role, "errors": errors }) if not errors: # repos will stay scheduled until they are successful set_role_data( dynamo_table, role.role_id, {"Repoed": datetime.datetime.utcnow().isoformat()}, ) update_repoed_description(role.role_name, **conn) partial_update_role_data( role, dynamo_table, account_number, config, conn, hooks, source="Repo", add_no_repo=False, ) LOGGER.info("Successfully repoed role: {} in account {}".format( role.role_name, account_number)) return errors
def store(self, fields: Optional[List[str]] = None) -> None: create = False try: remote_role_data = Role(role_id=self.role_id, arn=self.arn) remote_role_data.fetch(fields=["LastUpdated"]) if (remote_role_data.last_updated and self.last_updated and remote_role_data.last_updated > self.last_updated): # Fetch the rest of the role data for debugging remote_role_data.fetch() logger.warning( "role has been updated since last fetch: stored %s, local %s", remote_role_data.last_updated, self.last_updated, extra={ "stored_role": remote_role_data.dict(), "local_role": self.dict(), }, ) raise IntegrityError( "stored role has been updated since last fetch") except RoleNotFoundError: create = True self.last_updated = datetime.datetime.now() set_role_data_args: Dict[str, Any] = { "by_alias": True, } # If fields are specified, we need to add last_updated to make sure it gets set if fields: include_fields = set(fields) include_fields.add("last_updated") set_role_data_args["include"] = include_fields # Exclude key fields unless this is a newly-created item. Key fields cannot be included # in DynamoDB update calls. exclude_fields = self._meta if not create: exclude_fields.update(self._keys) set_role_data_args["exclude"] = exclude_fields attempts = 0 max_retries = 3 while attempts < max_retries: try: set_role_data( self.role_id, self.dict(**set_role_data_args), create=create, ) self._updated_fields = (self._updated_fields - set(fields) if fields else set()) # model is still dirty if we haven't stored all updated fields self._dirty = len(self._updated_fields) > 0 return except DynamoDBMaxItemSizeError: logger.info( "role %s too big for DynamoDB, removing oldest policy version", self.role_name, ) self._remove_oldest_policy_version() attempts += 1 continue except DynamoDBError: logger.info( "failed attempt %d to store role %s in DynamoDB", attempts, self.role_name, exc_info=True, ) attempts += 1 continue # If we've made it this far, the role was not stored raise RoleStoreError(f"failed to store {self.arn} in DynamoDB")
def update_role_cache(account_number, dynamo_table, config, hooks): """ Update data about all roles in a given account: 1) list all the roles and initiate a role object with basic data including name and roleID 2) get inline policies for each of the roles 3) build a list of active roles - we'll want to keep data about roles that may have been deleted in case we need to restore them, so if we used to have a role and now we don't see it we'll mark it inactive 4) update data about the roles in Dynamo 5) mark inactive roles in Dynamo 6) load and instantiate filter plugins 7) for each filter determine the list of roles that it filters 8) update data in Dynamo about filters 9) get Aardvark data for each role 10) update Dynamo with Aardvark data 11) calculate repoable permissions/policies for all the roles 12) update Dynamo with information about how many total and repoable permissions and which services are repoable 13) update stats in Dynamo with basic information like total permissions and which filters are applicable Args: account_number (string): The current account number Repokid is being run against Returns: None """ conn = config["connection_iam"] conn["account_number"] = account_number LOGGER.info( "Getting current role data for account {} (this may take a while for large accounts)" .format(account_number)) role_data = get_account_authorization_details(filter="Role", **conn) role_data_by_id = {item["RoleId"]: item for item in role_data} # convert policies list to dictionary to maintain consistency with old call which returned a dict for _, data in role_data_by_id.items(): data["RolePolicyList"] = { item["PolicyName"]: item["PolicyDocument"] for item in data["RolePolicyList"] } roles = Roles([Role(rd) for rd in role_data]) active_roles = [] LOGGER.info("Updating role data for account {}".format(account_number)) for role in tqdm(roles): role.account = account_number current_policies = role_data_by_id[role.role_id]["RolePolicyList"] active_roles.append(role.role_id) roledata.update_role_data(dynamo_table, account_number, role, current_policies) LOGGER.info("Finding inactive roles in account {}".format(account_number)) roledata.find_and_mark_inactive(dynamo_table, account_number, active_roles) LOGGER.info("Filtering roles") plugins = FilterPlugins() # Blocklist needs to know the current account filter_config = config["filter_config"] blocklist_filter_config = filter_config.get( "BlocklistFilter", filter_config.get("BlacklistFilter")) blocklist_filter_config["current_account"] = account_number for plugin_path in config.get("active_filters"): plugin_name = plugin_path.split(":")[1] if plugin_name == "ExclusiveFilter": # ExclusiveFilter plugin active; try loading its config. Also, it requires the current account, so add it. exclusive_filter_config = filter_config.get("ExclusiveFilter", {}) exclusive_filter_config["current_account"] = account_number plugins.load_plugin(plugin_path, config=config["filter_config"].get( plugin_name, None)) for plugin in plugins.filter_plugins: filtered_list = plugin.apply(roles) class_name = plugin.__class__.__name__ for filtered_role in filtered_list: LOGGER.info("Role {} filtered by {}".format( filtered_role.role_name, class_name)) filtered_role.disqualified_by.append(class_name) for role in roles: set_role_data(dynamo_table, role.role_id, {"DisqualifiedBy": role.disqualified_by}) LOGGER.info( "Getting data from Aardvark for account {}".format(account_number)) aardvark_data = get_aardvark_data(config["aardvark_api_location"], account_number=account_number) LOGGER.info("Updating roles with Aardvark data in account {}".format( account_number)) for role in roles: try: role.aa_data = aardvark_data[role.arn] except KeyError: LOGGER.warning("Aardvark data not found for role: {} ({})".format( role.role_id, role.role_name)) else: set_role_data(dynamo_table, role.role_id, {"AAData": role.aa_data}) LOGGER.info( "Calculating repoable permissions and services for account {}".format( account_number)) batch_processing = config.get("query_role_data_in_batch", False) batch_size = config.get("batch_processing_size", 100) roledata._calculate_repo_scores( roles, config["filter_config"]["AgeFilter"]["minimum_age"], hooks, batch_processing, batch_size, ) for role in roles: LOGGER.debug( "Role {} in account {} has\nrepoable permissions: {}\nrepoable services: {}" .format( role.role_name, account_number, role.repoable_permissions, role.repoable_services, )) set_role_data( dynamo_table, role.role_id, { "TotalPermissions": role.total_permissions, "RepoablePermissions": role.repoable_permissions, "RepoableServices": role.repoable_services, }, ) LOGGER.info("Updating stats in account {}".format(account_number)) roledata.update_stats(dynamo_table, roles, source="Scan")
def remove_permissions_from_role( account_number, permissions, role, role_id, dynamo_table, config, hooks, commit=False, ): """Remove the list of permissions from the provided role. Args: account_number (string) permissions (list<string>) role (Role object) role_id (string) commit (bool) Returns: None """ repoed_policies, deleted_policy_names = roledata._get_repoed_policy( role.policies[-1]["Policy"], permissions) if inline_policies_size_exceeds_maximum(repoed_policies): LOGGER.error( "Policies would exceed the AWS size limit after repo for role: {} in account {}. " "Please manually minify.".format(role.role_name, account_number)) return if not commit: log_deleted_and_repoed_policies(deleted_policy_names, repoed_policies, role.role_name, account_number) return conn = config["connection_iam"] conn["account_number"] = account_number for name in deleted_policy_names: error = delete_policy(name, role, account_number, conn) if error: LOGGER.error(error) if repoed_policies: error = replace_policies(repoed_policies, role, account_number, conn) if error: LOGGER.error(error) current_policies = get_role_inline_policies(role.as_dict(), **conn) or {} roledata.add_new_policy_version(dynamo_table, role, current_policies, "Repo") set_role_data(dynamo_table, role.role_id, {"Repoed": datetime.datetime.utcnow().isoformat()}) update_repoed_description(role.role_name, **conn) partial_update_role_data( role, dynamo_table, account_number, config, conn, hooks, source="ManualPermissionRepo", add_no_repo=False, ) LOGGER.info( "Successfully removed {permissions} from role: {role} in account {account_number}" .format(permissions=permissions, role=role.role_name, account_number=account_number))
def update_role_data(dynamo_table, account_number, role, current_policy, current_managed_policy, source="Scan", add_no_repo=True, include_managed_policies=True): """ Compare the current version of a policy for a role and what has been previously stored in Dynamo. - If current and new policy versions are different store the new version in Dynamo. Add any newly added permissions to temporary permission blocklist. Purge any old entries from permission blocklist. - Refresh the updated time on the role policy - If the role is completely new, store the first version in Dynamo - Updates the role with full history of policies, including current version Args: dynamo_table account_number role (Role): current role being updated current_policy (dict): representation of the current policy version current_managed_policy (dict): representation of the current managed policy versions source: Default 'Scan' but could be Repo, Rollback, etc add_no_repo (bool) Returns: None """ # policy_entry: source, discovered, policy stored_role = get_role_data( dynamo_table, role.role_id, fields=["OptOut", "Policies", "ManagedPolicies", "Tags"]) if not stored_role: role_dict = store_initial_role_data( dynamo_table, role.arn, role.create_date, role.role_id, role.role_name, account_number, current_policy, current_managed_policy, role.tags, ) role_updates = Role.parse_obj(role_dict) update_dict = role_updates.dict(exclude_unset=True) role = role.copy(update=update_dict) LOGGER.info("Added new role ({}): {}".format(role.role_id, role.arn)) return role else: # is the policy list the same as the last we had? old_policy = stored_role["Policies"][-1]["Policy"] if current_policy != old_policy: add_new_policy_version(dynamo_table, role, current_policy, source) LOGGER.info( "{} has different inline policies than last time, adding to role store" .format(role.arn)) newly_added_permissions = find_newly_added_permissions( old_policy, current_policy) else: newly_added_permissions = set() # TODO Make this part of set_role_data instead to allow updating existing dynamo tables # TODO this code will not work with existing dynamo tables - because old roles won't have ManagedPolicies old_managed_policy = stored_role["ManagedPolicies"][-1]["Policy"] if current_managed_policy != old_managed_policy: add_new_managed_policy_version(dynamo_table, role, current_managed_policy, source) LOGGER.info( "{} has different managed policies than last time, adding to role store" .format(role.arn)) newly_added_managed_permissions = find_newly_added_permissions( old_managed_policy, current_managed_policy) else: newly_added_managed_permissions = set() # update tags if needed if role.tags != stored_role.get("Tags", []): set_role_data(dynamo_table, role.role_id, {"Tags": role.tags}) if add_no_repo: update_no_repo_permissions(dynamo_table, role, newly_added_permissions) if include_managed_policies: update_no_repo_permissions(dynamo_table, role, newly_added_managed_permissions) update_opt_out(dynamo_table, role) set_role_data( dynamo_table, role.role_id, {"Refreshed": datetime.datetime.utcnow().isoformat()}, ) # Update all data from Dynamo except CreateDate (it's in the wrong format) and DQ_by (we're going to recalc) current_role_data = get_role_data(dynamo_table, role.role_id) current_role_data.pop("CreateDate", None) current_role_data.pop("DisqualifiedBy", None) # Create an updated Role model to be returned to the caller role_updates = Role.parse_obj(current_role_data) update_dict = role_updates.dict(exclude_unset=True) role = role.copy(update=update_dict) return role
def partial_update_role_data(role, dynamo_table, account_number, config, conn, hooks, source, add_no_repo=True): """ Perform a scaled down version of role update, this is used to get an accurate count of repoable permissions after a rollback or repo. Does update: - Policies - Aardvark data - Total permissions - Repoable permissions - Repoable services - Stats Does not update: - Filters - Active/inactive roles Args: role (Role) dynamo_table account_number config conn (dict) hooks source: repo, rollback, etc add_no_repo: if set to True newly discovered permissions will be added to no repo list Returns: None """ current_policies = get_role_inline_policies(role.dict(), **conn) or {} update_role_data( dynamo_table, account_number, role, current_policies, source=source, add_no_repo=add_no_repo, ) aardvark_data = get_aardvark_data(config["aardvark_api_location"], arn=role.arn) if not aardvark_data: return batch_processing = config.get("query_role_data_in_batch", False) batch_size = config.get("batch_processing_size", 100) role.aa_data = aardvark_data[role.arn] _calculate_repo_scores( [role], config["filter_config"]["AgeFilter"]["minimum_age"], hooks, batch_processing, batch_size, ) set_role_data( dynamo_table, role.role_id, { "AAData": role.aa_data, "TotalPermissions": role.total_permissions, "RepoablePermissions": role.repoable_permissions, "RepoableServices": role.repoable_services, }, ) update_stats(dynamo_table, [role], source=source) # TODO update
def _repo_role( account_number, role_name, dynamo_table, config, hooks, commit=False, scheduled=False, ): """ Calculate what repoing can be done for a role and then actually do it if commit is set 1) Check that a role exists, it isn't being disqualified by a filter, and that is has fresh AA data 2) Get the role's current permissions, repoable permissions, and the new policy if it will change 3) Make the changes if commit is set Args: account_number (string) role_name (string) commit (bool) Returns: None """ errors = [] role_id = find_role_in_cache(dynamo_table, account_number, role_name) # only load partial data that we need to determine if we should keep going role_data = get_role_data( dynamo_table, role_id, fields=["DisqualifiedBy", "AAData", "RepoablePermissions", "RoleName"], ) if not role_data: LOGGER.warn("Could not find role with name {}".format(role_name)) return else: role = Role(role_data) continuing = True if len(role.disqualified_by) > 0: LOGGER.info( "Cannot repo role {} in account {} because it is being disqualified by: {}" .format(role_name, account_number, role.disqualified_by)) continuing = False if not role.aa_data: LOGGER.warning("ARN not found in Access Advisor: {}".format(role.arn)) continuing = False if not role.repoable_permissions: LOGGER.info("No permissions to repo for role {} in account {}".format( role_name, account_number)) continuing = False # if we've gotten to this point, load the rest of the role role = Role(get_role_data(dynamo_table, role_id)) old_aa_data_services = [] for aa_service in role.aa_data: if datetime.datetime.strptime( aa_service["lastUpdated"], "%a, %d %b %Y %H:%M:%S %Z" ) < datetime.datetime.now() - datetime.timedelta( days=config["repo_requirements"]["oldest_aa_data_days"]): old_aa_data_services.append(aa_service["serviceName"]) if old_aa_data_services: LOGGER.error( "AAData older than threshold for these services: {} (role: {}, account {})" .format(old_aa_data_services, role_name, account_number), exc_info=True, ) continuing = False total_permissions, eligible_permissions = roledata._get_role_permissions( role) repoable_permissions = roledata._get_repoable_permissions( account_number, role.role_name, eligible_permissions, role.aa_data, role.no_repo_permissions, config["filter_config"]["AgeFilter"]["minimum_age"], hooks, ) # if this is a scheduled repo we need to filter out permissions that weren't previously scheduled if scheduled: repoable_permissions = roledata._filter_scheduled_repoable_perms( repoable_permissions, role.scheduled_perms) repoed_policies, deleted_policy_names = roledata._get_repoed_policy( role.policies[-1]["Policy"], repoable_permissions) if inline_policies_size_exceeds_maximum(repoed_policies): error = ( "Policies would exceed the AWS size limit after repo for role: {} in account {}. " "Please manually minify.".format(role_name, account_number)) LOGGER.error(error) errors.append(error) continuing = False # if we aren't repoing for some reason, unschedule the role if not continuing: set_role_data(dynamo_table, role.role_id, { "RepoScheduled": 0, "ScheduledPerms": [] }) return if not commit: log_deleted_and_repoed_policies(deleted_policy_names, repoed_policies, role_name, account_number) return conn = config["connection_iam"] conn["account_number"] = account_number for name in deleted_policy_names: error = delete_policy(name, role, account_number, conn) if error: LOGGER.error(error) errors.append(error) if repoed_policies: error = replace_policies(repoed_policies, role, account_number, conn) if error: LOGGER.error(error) errors.append(error) current_policies = get_role_inline_policies(role.as_dict(), **conn) or {} roledata.add_new_policy_version(dynamo_table, role, current_policies, "Repo") # regardless of whether we're successful we want to unschedule the repo set_role_data(dynamo_table, role.role_id, { "RepoScheduled": 0, "ScheduledPerms": [] }) repokid.hooks.call_hooks(hooks, "AFTER_REPO", { "role": role, "errors": errors }) if not errors: # repos will stay scheduled until they are successful set_role_data( dynamo_table, role.role_id, {"Repoed": datetime.datetime.utcnow().isoformat()}, ) update_repoed_description(role.role_name, **conn) partial_update_role_data( role, dynamo_table, account_number, config, conn, hooks, source="Repo", add_no_repo=False, ) LOGGER.info("Successfully repoed role: {} in account {}".format( role.role_name, account_number)) return errors
def repo_role(account_number, role_name, dynamo_table, config, hooks, commit=False, scheduled=False): """ Calculate what repoing can be done for a role and then actually do it if commit is set 1) Check that a role exists, it isn't being disqualified by a filter, and that is has fresh AA data 2) Get the role's current permissions, repoable permissions, and the new policy if it will change 3) Make the changes if commit is set Args: account_number (string) role_name (string) commit (bool) Returns: None """ errors = [] role_id = find_role_in_cache(dynamo_table, account_number, role_name) # only load partial data that we need to determine if we should keep going role_data = get_role_data(dynamo_table, role_id, fields=['DisqualifiedBy', 'AAData', 'RepoablePermissions', 'RoleName']) if not role_data: LOGGER.warn('Could not find role with name {}'.format(role_name)) return else: role = Role(role_data) if len(role.disqualified_by) > 0: LOGGER.info('Cannot repo role {} in account {} because it is being disqualified by: {}'.format( role_name, account_number, role.disqualified_by)) return if not role.aa_data: LOGGER.warning('ARN not found in Access Advisor: {}'.format(role.arn)) return if not role.repoable_permissions: LOGGER.info('No permissions to repo for role {} in account {}'.format(role_name, account_number)) return # if we've gotten to this point, load the rest of the role role = Role(get_role_data(dynamo_table, role_id)) old_aa_data_services = [] for aa_service in role.aa_data: if(datetime.datetime.strptime(aa_service['lastUpdated'], '%a, %d %b %Y %H:%M:%S %Z') < datetime.datetime.now() - datetime.timedelta(days=config['repo_requirements']['oldest_aa_data_days'])): old_aa_data_services.append(aa_service['serviceName']) if old_aa_data_services: LOGGER.error('AAData older than threshold for these services: {} (role: {}, account {})'.format( old_aa_data_services, role_name, account_number)) return permissions = roledata._get_role_permissions(role) repoable_permissions = roledata._get_repoable_permissions(account_number, role.role_name, permissions, role.aa_data, role.no_repo_permissions, config['filter_config']['AgeFilter']['minimum_age'], hooks) # if this is a scheduled repo we need to filter out permissions that weren't previously scheduled if scheduled: repoable_permissions = roledata._filter_scheduled_repoable_perms(repoable_permissions, role.scheduled_perms) repoed_policies, deleted_policy_names = roledata._get_repoed_policy(role.policies[-1]['Policy'], repoable_permissions) policies_length = len(json.dumps(repoed_policies)) if policies_length > MAX_AWS_POLICY_SIZE: error = ("Policies would exceed the AWS size limit after repo for role: {} in account {}. " "Please manually minify.".format(role_name, account_number)) LOGGER.error(error) errors.append(error) return if not commit: for name in deleted_policy_names: LOGGER.info('Would delete policy from {} with name {} in account {}'.format( role_name, name, account_number)) if repoed_policies: LOGGER.info('Would replace policies for role {} with: \n{} in account {}'.format( role_name, json.dumps(repoed_policies, indent=2, sort_keys=True), account_number)) return conn = config['connection_iam'] conn['account_number'] = account_number for name in deleted_policy_names: LOGGER.info('Deleting policy with name {} from {} in account {}'.format(name, role.role_name, account_number)) try: delete_role_policy(RoleName=role.role_name, PolicyName=name, **conn) except botocore.exceptions.ClientError as e: error = 'Error deleting policy: {} from role: {} in account {}. Exception: {}'.format( name, role.role_name, account_number, e) LOGGER.error(error) errors.append(error) if repoed_policies: LOGGER.info('Replacing Policies With: \n{} (role: {} account: {})'.format( json.dumps(repoed_policies, indent=2, sort_keys=True), role.role_name, account_number)) for policy_name, policy in repoed_policies.items(): try: put_role_policy(RoleName=role.role_name, PolicyName=policy_name, PolicyDocument=json.dumps(policy, indent=2, sort_keys=True), **conn) except botocore.exceptions.ClientError as e: error = 'Exception calling PutRolePolicy on {role}/{policy} in account {account}\n{e}\n'.format( role=role.role_name, policy=policy_name, account=account_number, e=str(e)) LOGGER.error(error) errors.append(error) current_policies = get_role_inline_policies(role.as_dict(), **conn) or {} roledata.add_new_policy_version(dynamo_table, role, current_policies, 'Repo') # regardless of whether we're successful we want to unschedule the repo set_role_data(dynamo_table, role.role_id, {'RepoScheduled': 0, 'ScheduledPerms': []}) repokid.hooks.call_hooks(hooks, 'AFTER_REPO', {'role': role}) if not errors: # repos will stay scheduled until they are successful set_role_data(dynamo_table, role.role_id, {'Repoed': datetime.datetime.utcnow().isoformat()}) _update_repoed_description(role.role_name, **conn) _update_role_data(role, dynamo_table, account_number, config, conn, hooks, source='Repo', add_no_repo=False) LOGGER.info('Successfully repoed role: {} in account {}'.format(role.role_name, account_number)) return errors
def update_role_cache(account_number, dynamo_table, config, hooks): """ Update data about all roles in a given account: 1) list all the roles and initiate a role object with basic data including name and roleID 2) get inline policies for each of the roles 3) build a list of active roles - we'll want to keep data about roles that may have been deleted in case we need to restore them, so if we used to have a role and now we don't see it we'll mark it inactive 4) update data about the roles in Dynamo 5) mark inactive roles in Dynamo 6) load and instantiate filter plugins 7) for each filter determine the list of roles that it filters 8) update data in Dynamo about filters 9) get Aardvark data for each role 10) update Dynamo with Aardvark data 11) calculate repoable permissions/policies for all the roles 12) update Dynamo with information about how many total and repoable permissions and which services are repoable 13) update stats in Dynamo with basic information like total permissions and which filters are applicable Args: account_number (string): The current account number Repokid is being run against Returns: None """ conn = config['connection_iam'] conn['account_number'] = account_number LOGGER.info('Getting current role data for account {} (this may take a while for large accounts)'.format( account_number)) role_data = get_account_authorization_details(filter='Role', **conn) role_data_by_id = {item['RoleId']: item for item in role_data} # convert policies list to dictionary to maintain consistency with old call which returned a dict for _, data in role_data_by_id.items(): data['RolePolicyList'] = {item['PolicyName']: item['PolicyDocument'] for item in data['RolePolicyList']} roles = Roles([Role(rd) for rd in role_data]) active_roles = [] LOGGER.info('Updating role data for account {}'.format(account_number)) for role in tqdm(roles): role.account = account_number current_policies = role_data_by_id[role.role_id]['RolePolicyList'] active_roles.append(role.role_id) roledata.update_role_data(dynamo_table, account_number, role, current_policies) LOGGER.info('Finding inactive roles in account {}'.format(account_number)) roledata.find_and_mark_inactive(dynamo_table, account_number, active_roles) LOGGER.info('Filtering roles') plugins = FilterPlugins() # Blacklist needs to know the current account config['filter_config']['BlacklistFilter']['current_account'] = account_number for plugin_path in config.get('active_filters'): plugin_name = plugin_path.split(':')[1] plugins.load_plugin(plugin_path, config=config['filter_config'].get(plugin_name, None)) for plugin in plugins.filter_plugins: filtered_list = plugin.apply(roles) class_name = plugin.__class__.__name__ for filtered_role in filtered_list: LOGGER.info('Role {} filtered by {}'.format(filtered_role.role_name, class_name)) filtered_role.disqualified_by.append(class_name) for role in roles: set_role_data(dynamo_table, role.role_id, {'DisqualifiedBy': role.disqualified_by}) LOGGER.info('Getting data from Aardvark for account {}'.format(account_number)) aardvark_data = _get_aardvark_data(config['aardvark_api_location'], account_number=account_number) LOGGER.info('Updating roles with Aardvark data in account {}'.format(account_number)) for role in roles: try: role.aa_data = aardvark_data[role.arn] except KeyError: LOGGER.warning('Aardvark data not found for role: {} ({})'.format(role.role_id, role.role_name)) else: set_role_data(dynamo_table, role.role_id, {'AAData': role.aa_data}) LOGGER.info('Calculating repoable permissions and services for account {}'.format(account_number)) roledata._calculate_repo_scores(roles, config['filter_config']['AgeFilter']['minimum_age'], hooks) for role in roles: LOGGER.debug('Role {} in account {} has\nrepoable permissions: {}\nrepoable services:'.format( role.role_name, account_number, role.repoable_permissions, role.repoable_services )) set_role_data(dynamo_table, role.role_id, {'TotalPermissions': role.total_permissions, 'RepoablePermissions': role.repoable_permissions, 'RepoableServices': role.repoable_services}) LOGGER.info('Updating stats in account {}'.format(account_number)) roledata.update_stats(dynamo_table, roles, source='Scan')