class RulesAccessor: logger = get_logger(__name__) one_time_folder = os.path.join( str(Path(os.path.realpath(__file__)).parent), 'rules') def __init__(self): self.all_rules = dict() self.read_all_rules() def read_all_rules(self): # Possible reading from DB in future. # Reading from filesystem for now self.logger.debug('Reading Rules ..') for yaml_file in os.listdir(self.one_time_folder): with open(os.path.join(self.one_time_folder, yaml_file)) as fp: y_contents = yaml.load(fp, Loader=yaml.BaseLoader) self.all_rules[y_contents['rule_id']] = y_contents # self.all_yaml_files.extend(os.listdir('scan_rules/persistent')) def get_rule_by_id(self, rule_id, suppress_exception=False): rule = self.all_rules.get(rule_id) if not suppress_exception and not rule: self.logger.error(f'Rule {rule_id} not Found !') self.logger.error(f"Valid rules are {list(self.all_rules.keys())}") raise ApplicationException(f"Rule {rule_id} not Found !!") return rule
def __init__(self, project_id, **kwargs): # Add authentication check here # Add common object instantiation # TODO: fetch the default project from the APPLICATION CREDENTIALS JSON self.project_id = project_id self.credentials, self.default_project_id = default( scopes=['https://www.googleapis.com/auth/cloud-platform']) self.session = AuthorizedSession(self.credentials) self.logger = get_logger(__name__)
class GcloudSqlAdmin(GcloudRestLibBase): logger = get_logger('compute') poll_sleep_time_in_secs = 1 def __init__(self, **kwargs): super().__init__(**kwargs) self.sqladmin_instance = googleapiclient.discovery.build('sqladmin', 'v1beta4', cache_discovery=False) def list_sql_instances(self): return self.sqladmin_instance.instances().list(project=self.project_id).execute()
class GcloudRedisV1(GcloudRestLibBase): logger = get_logger(__name__) def __init__(self, **kwargs): super().__init__(**kwargs) self.redis_service = googleapiclient.discovery.build( 'redis', 'v1', cache_discovery=False) def list_all_redis_instances(self): parent = f'projects/{self.project_id}/locations/-' return self.redis_service.projects().locations().instances().list( parent=parent).execute()
class GcloudStorageV1(GcloudRestLibBase): logger = get_logger(__name__) def __init__(self, **kwargs): super().__init__(**kwargs) self.storage_service = googleapiclient.discovery.build( 'storage', 'v1', cache_discovery=False) def list_all_storage_buckets(self): return self.storage_service.buckets().list( project=self.project_id).execute() def list_all_objects(self, bucket_name): return self.storage_service.objects().list( bucket=bucket_name).execute()
def __init__(self, project_id): self.logger = get_logger(__file__.replace('py', '')) self.gcloud_lib = Gcloud(project_id=project_id)
class DependencyResolver: logger = get_logger('dependency_resolver') def __init__(self, project_id): self.gcloud_lib = Gcloud(project_id=project_id) def resolve_dependencies(self, resource_type, resource_id): resolver_function = getattr(self, f'dependency_resolver__{resource_type}') return resolver_function(resource_id=resource_id) def dependency_resolver__sql_v1beta4_instances(self, resource_id): return [resource_id] def dependency_resolver__redis_v1_instances(self, resource_id): return [resource_id] def dependency_resolver__storage_v1_b(self, resource_id): # check if there are any objects inside and add them all as dependencies all_objects_in_bucket = self.gcloud_lib.list_all_objects( bucket_name=parse_link(self_link=resource_id)['resource_name']) return [resource_id] + [ obj['selfLink'] for obj in all_objects_in_bucket.get('items', []) ] def dependency_resolver__compute_v1_instances(self, resource_id): # The stack to return at end dependency_stack = [resource_id] instance_self_link = resource_id self_link_values = parse_link(self_link=instance_self_link, extra_expected_values=['instances']) # TODO: Handle case when self_link parsing fails. Raise Customer exception zone = self_link_values.get('zones') instance_name = self_link_values.get('instances') # Get list of resources referring to this instance (dependencies) try: instance_referrers = self.gcloud_lib.list_referrers_of_instance( zone=zone, instance=instance_name) except googleapiclient.errors.HttpError as ex: if int(ex.resp['status']) == 404: self.logger.debug( 'Assuming 404 to be resource already deleted') return [] else: raise ex # If there are no dependencies, the instance is idependent if 'items' not in instance_referrers: return dependency_stack # If there are dependencies: for referrer_details in instance_referrers['items']: # TODO: Handle if resource type could not be guessed # 1. Get the dependency type referrer_resource_type = get_resource_type( self_link=referrer_details['referrer']) # 2. Dynamic Determine Call the function that handles this type of resource function_to_resolve = getattr( self, f'dependency_resolver__{referrer_resource_type}', None) if not function_to_resolve: print( f'Dont know how to resolve referrer of type {referrer_resource_type}' ) continue referrer_stack = function_to_resolve(referrer_details['referrer']) # 3. If the function returns its own stack, push to existing stack if referrer_stack: dependency_stack.extend(referrer_stack) return dependency_stack def dependency_resolver__compute_v1_instanceGroups(self, resource_id): """ :param resource_id: Expected to be selfLink of an instanceGroup :return: List of dependent resources. List containing self if no dependents """ to_return_stack = [resource_id] # Since we don't have a direct API that can give instance groups # We shall have to check all possible places where an InstanceGroup Can be referred # 1. Instance Group Managers all_instance_group_managers = self.gcloud_lib.get_all_instance_group_managers( ) if 'items' not in all_instance_group_managers: return to_return_stack for reg_zone, region_zone in all_instance_group_managers[ 'items'].items(): if 'instanceGroupManagers' not in region_zone: continue for instance_group_manager in region_zone['instanceGroupManagers']: if instance_group_manager.get('instanceGroup') == resource_id: to_return_stack.append(instance_group_manager['selfLink']) # 2. Backend Services all_backend_services_info = self.gcloud_lib.get_all_backend_services() # There are no backend services in the entire project if 'items' not in all_backend_services_info: return to_return_stack # Looping over resources in each region for region, region_info in all_backend_services_info['items'].items( ): # region -> can be 'global' as well # There are no backend services in this region if 'backendServices' not in region_info: continue # Looping over every backend service in a region for backend_service_info in region_info['backendServices']: # Not concerned about external load balancers here if not ('loadBalancingScheme' in backend_service_info and 'INTERNAL' in backend_service_info['loadBalancingScheme']): continue if 'backends' not in backend_service_info: continue for backend_info in backend_service_info['backends']: # This backend actually refers to the required instanceGroup if 'group' in backend_info and backend_info[ 'group'] == resource_id: referrer_resource_id = backend_service_info['selfLink'] # Get resource type. Expected: backendService referrer_resource_type = get_resource_type( self_link=referrer_resource_id) # Check if we have a function that can further resolve dependencies function_to_resolve = getattr( self, f'dependency_resolver__{referrer_resource_type}', None) if not function_to_resolve: print( f'Dont know how to resolve referrer of type {referrer_resource_type}' ) # At least add to stack what we have discovered to_return_stack.append(referrer_resource_id) continue # Call the corresponding function to resolve further dependecies referrer_stack = function_to_resolve( referrer_resource_id) if referrer_stack: to_return_stack.extend(referrer_stack) break return to_return_stack def dependency_resolver__compute_v1_backendServices(self, resource_id): """ Check for resources that refer this backendService and return them :param resource_id: Expected to be selfLink of BackendService :return: List """ to_return_stack = [resource_id] # 2. Checking all URL Maps where this is referred # TODO: Based on the load balancing scheme of the BackendService determine # if it can be referred global URL MAP or a local url map. # LoadBalancingScheme => EXTERNAL & INTERNAL_SELF_MANAGED -> Global || INTERNAL_MANAGED -> Regional self_link_values = parse_link(self_link=resource_id) all_url_maps = self.gcloud_lib.get_all_regional_url_maps( region=self_link_values['regions']) for url_map in all_url_maps.get('items', []): if url_map.get('defaultService', None) == resource_id: referrer_resource_id = url_map['selfLink'] referrer_resource_type = get_resource_type( self_link=referrer_resource_id) # Check if we have a function that can further resolve dependencies function_to_resolve = getattr( self, f'dependency_resolver__{referrer_resource_type}', None) if not function_to_resolve: print( f'Dont know how to resolve referrer of type {referrer_resource_type}' ) # At least add to stack what we have discovered to_return_stack.append(referrer_resource_id) continue # Call the corresponding function to resolve further dependecies referrer_stack = function_to_resolve(referrer_resource_id) if referrer_stack: to_return_stack.extend(referrer_stack) # 1. Checking Forwarding Rules where this backend service is listed all_forwarding_rules = self.gcloud_lib.get_all_forwarding_rules() if 'items' not in all_forwarding_rules: return to_return_stack for region, region_forwarding_rules_info in all_forwarding_rules[ 'items'].items(): # There are no forwarding rules in this region if 'forwardingRules' not in region_forwarding_rules_info: continue for forwarding_rule in region_forwarding_rules_info[ 'forwardingRules']: if 'backendService' in forwarding_rule and forwarding_rule[ 'backendService'] == resource_id: to_return_stack.append(forwarding_rule['selfLink']) return to_return_stack def dependency_resolver__compute_v1_urlMaps(self, resource_id): to_return_stack = [resource_id] # 1. Checking http Proxies where this url map is listed self_link_values = parse_link(self_link=resource_id, extra_expected_values=['urlMaps']) if 'global' in self_link_values: all_target_http_proxies_maps = self.gcloud_lib.get_all_global_http_proxies( ) elif 'regions' in self_link_values: all_target_http_proxies_maps = self.gcloud_lib.get_all_regional_http_proxies( region=self_link_values['regions']) else: self.logger.warning( 'Could not resolve dependency for the URL MAP {}'.format( resource_id)) return to_return_stack for http_proxy in all_target_http_proxies_maps.get('items', []): if http_proxy.get('urlMap') == resource_id: to_return_stack.append(http_proxy['selfLink']) return to_return_stack
class FilterLib: logger = get_logger("filter_lib") def run_filter(self, filter_type, filter_data, candidate): # TODO: evaluate the filter based on filter type and then return boolean filter_function_name = f'filter_handler__{filter_type.lower()}' filter_function = getattr(self, filter_function_name, None) # TODO: Handle illegal filter case return filter_function(filter_data, candidate) def summarise_all_filters_of_rule(self, rule_filter_evaluation_status, filter_join_rule): for filter_id, filter_status in rule_filter_evaluation_status.items(): filter_join_rule = filter_join_rule.replace( filter_id, str(filter_status)) return bool(eval(filter_join_rule)) # TODO: Put all filter handlers together def filter_handler__name(self, filter_data, candidate): """ :param filter_data: from YAML :param candidate: string -> the name to search for in candidate :return: """ name_regex = filter_data['name_regex'] # In the present case the candidate is a string. # TODO: Document this contract centrally return bool(re.search(pattern=name_regex, string=candidate)) def filter_handler__autodetect_declared_expiry(self, filter_data, candidate): """ :param filter_data: from yaml :param candidate: dict -> {'literals': list -> list of literals to consider, 'age' : int-> age in secs} :return: bool """ filter_regex = filter_data['expected_regex'] for literal in candidate['literals']: regex_search = re.search(pattern=filter_regex, string=literal, flags=re.IGNORECASE) if not regex_search: continue required_string = regex_search.group() # e.g. expected : dnd_3 digit_search_pattern = r'\d+' digit_search = re.search(pattern=digit_search_pattern, string=required_string) if not digit_search: self.logger.exception( f'Illegal filter. The string {required_string} filtered using regex {filter_regex}' f' does not have any digits so as age can be determined') raise ApplicationException( 'Illegal regex in filter with no digit.') required_digit = int(digit_search.group()) # expected e.g.: 3 return self.filter_handler__age(filter_data={ 'age': required_digit, 'unit': filter_data['digit_unit'] }, candidate=candidate['age']) return False def filter_handler__age(self, filter_data, candidate): """ :param filter_data: from YAML :param candidate: int -> Age of resource in seconds :return: bool """ multiplier_to_secs = {'days': 24 * 60 * 60, 'hours': 60 * 60} amount = int(filter_data['age']) unit = filter_data['unit'] return bool(candidate - amount * multiplier_to_secs[unit] > 0) def filter_handler__tag(self, filter_data, candidate): """ :param filter_data: :param candidate: Expected: Dict of key value pairs. Accepts blank dict as well :return: bool """ expected_key_regex = filter_data['key'] expected_value_regex = filter_data.get('value', None) for actual_label_key, actual_label_value in candidate.items(): if not re.search(pattern=expected_key_regex, string=actual_label_key): continue # Key is passing. Yet to check if a value should pass as well if not expected_value_regex: # No value check specified. # Every rule can check for just 1 tag. So done with checking return True if re.search(pattern=expected_value_regex, string=actual_label_value): return True return False
import argparse from app.app import scan_resources, delete_scanned_resources from library.utilities.logger import get_logger logger = get_logger(__name__) parser = argparse.ArgumentParser( description="GCP resource Scan and deletion framework") parser.add_argument( "action", choices=['scan', 'delete'], metavar="action", type=str, nargs='?', default='scan', help='The action that you perform using the rule. scan or delete') parser.add_argument('--project_id', nargs="+", required=True, help='Your GCP project ID which you want to scan') parser.add_argument('--rules', nargs="+", metavar='RULE_ID', default=['a_week_old_resources'], help='the rule IDs that you want to run') arguments = parser.parse_args()