from checkov.serverless.runner import Runner as sls_runner from checkov.terraform.plan_runner import Runner as tf_plan_runner from checkov.terraform.runner import Runner as tf_graph_runner from checkov.version import version outer_registry = None logging_init() logger = logging.getLogger(__name__) checkov_runners = [ 'cloudformation', 'terraform', 'kubernetes', 'serverless', 'arm', 'terraform_plan', 'helm', 'dockerfile', 'secrets' ] DEFAULT_RUNNERS = (tf_graph_runner(), cfn_runner(), k8_runner(), sls_runner(), arm_runner(), tf_plan_runner(), helm_runner(), dockerfile_runner(), secrets_runner()) def run(banner=checkov_banner, argv=sys.argv[1:]): default_config_paths = get_default_config_paths(sys.argv[1:]) parser = ExtArgumentParser( description='Infrastructure as code static analysis', default_config_files=default_config_paths, config_file_parser_class=configargparse.YAMLConfigFileParser, add_env_var_help=True) add_parser_args(parser) config = parser.parse_args(argv) # bridgecrew uses both the urllib3 and requests libraries, while checkov uses the requests library. # Allow the user to specify a CA bundle to be used by both libraries. bc_integration.setup_http_manager(config.ca_certificate)
def scan_files(): crawler = artifactHubCrawler.ArtifactHubCrawler() crawlDict, totalRepos, totalPackages = crawler.mockCrawl() logging.info( f"Crawl completed with {totalPackages} charts from {totalRepos} repositories." ) checks_table = [] summary_table = [] all_resources = [] empty_resources_total = {} all_dataobj = [] extract_failures = [] download_failures = [] parse_deps_failures = [] chartNameFromResultDataExpression = '(.*)\.(RELEASE-NAME-)?(.*)(\.default)?' chartNameFromResultDataExpressionGroup = 3 for repoCount in crawlDict: repo = crawlDict[repoCount] depGraph = pgv.AGraph(strict=False, directed=True) summary_lst = [] result_lst = [] helmdeps_lst = [] empty_resources = {} orgRepoFilename = f"{repo['repoName']}" for chartPackage in crawlDict[repoCount]['repoPackages']: repoChartPathName = f"{repo['repoName']}/{chartPackage['name']}" ## DEBUG: Disable specific repo for scanning #if orgRepoFilename == "reponame": # continue if True: logging.info( f"Scanning {repo['repoName']}/{chartPackage['name']}| Download Source " ) # Setup local dir and download repoChartPathName = f"{repo['repoName']}/{chartPackage['name']}" downloadPath = f'{RESULTS_PATH}/{repoChartPathName}' if not os.path.exists(downloadPath): os.makedirs(downloadPath) try: wget.download(chartPackage['content_url'], downloadPath) for filename in glob.glob(f"{downloadPath}/**.tgz", recursive=False): try: extract(filename, downloadPath) logging.info( f"Scanning {repo['repoName']}/{chartPackage['name']}| Extract Source " ) os.remove(filename) except: logging.info( f"Failed to extract {repo['repoName']}/{chartPackage['name']}" ) extract_failures.append( [f"{repo['repoName']}/{chartPackage['name']}"]) continue except: logging.info( f"Failed to download {repo['repoName']}/{chartPackage['name']}" ) download_failures.append( [f"{repo['repoName']}/{chartPackage['name']}"]) continue logging.info( f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Chart Deps" ) proc = subprocess.Popen([ "helm", 'dependency', 'list', f"{downloadPath}/{chartPackage['name']}" ], stdout=subprocess.PIPE, stderr=subprocess.PIPE) o, e = proc.communicate() if e: if "Warning: Dependencies" in str(e, 'utf-8'): logging.info( f"V1 API chart without Chart.yaml dependancies. Skipping chart dependancy list for {chartPackage['name']} at dir: {downloadPath}/{chartPackage['name']}. Error details: {str(e, 'utf-8')}" ) else: logging.info( f"Error processing helm dependancies for {chartPackage['name']} at source dir: {downloadPath}/{chartPackage['name']}. Error details: {str(e, 'utf-8')}" ) chart_deps = parse_helm_dependency_output(o) logging.info(chart_deps) #### GRAPH. INITIAL NODE AND DEPS if chart_deps: depGraph.add_node( repoChartPathName, label=repoChartPathName, color='green', shape='circle' ) # adds current repo to graph - ONLY if there are deps. for key in chart_deps: logging.info(f" GRAPHING DEPS FOR {chart_deps[key]}") current_dep = chart_deps[key] depCombinedRepoName = f'{list(current_dep.values())[2]}/{list(current_dep.values())[0]}' #Consistent repo/chart naming for our dep depRepoNameLabel = f'{list(current_dep.values())[0]}' # Increment global deps tracking globalDepsUsage[ depCombinedRepoName] = globalDepsUsage.get( depCombinedRepoName, 0) + 1 globalDepsList[depCombinedRepoName].append( repoChartPathName) nodecolor = "black" if repo['repoName'] in list(current_dep.values())[2]: logging.info( f"{depCombinedRepoName} (repo name in dep url): is probably a local repo dep for {repoChartPathName} Coloring blue" ) nodecolor = "blue" elif "http" not in list(current_dep.values())[2]: logging.info( f"{depCombinedRepoName} (no url, local files): is probably a local repo dep for {repoChartPathName} leaving black" ) else: logging.info( f"{depCombinedRepoName} is probably a remote repo dep for {repoChartPathName} Coloring red" ) nodecolor = "red" depGraph.add_node( depCombinedRepoName, label=depRepoNameLabel, shape='circle', color=nodecolor) #create dep repo if non exist. depGraph.add_edge( repoChartPathName, depCombinedRepoName ) # Link our repo to the dep repo, will only link to the same source (new node if helm repo is from a different source) """ dep_item = [ repoChartPathName, #Current chart combined repo/path chartPackage['name'], #Current chart chartname repo['repoName'], #Current chart reponame chartPackage['version'], #Current chart version list(current_dep.values())[0], #dep dict chart_name list(current_dep.values())[1], #dep dict chart_version list(current_dep.values())[2], #dep dict chart_repo list(current_dep.values())[3] #dep dict chart_status ] """ # Assign results_scan outside of try objects. results_scan = object try: logging.info( f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Running Checkov" ) runner = helm_runner() results_scan = runner.run(root_folder=downloadPath, external_checks_dir=None, files=None) res = results_scan.get_dict() logging.info( f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Results" ) for passed_check in res["results"]["passed_checks"]: chartNameFromResultData = re.search( chartNameFromResultDataExpression, passed_check["resource"]).group( chartNameFromResultDataExpressionGroup) ## NEW. Default items if no key exists for non-critical components check = [ repoChartPathName, repo['repoName'], chartPackage['name'], chartPackage['version'], chartPackage['ts'], chartPackage.get('signed', 'no data'), chartPackage.get( 'security_report_created_at', 'no data'), chartNameFromResultData, chartPackage.get('is_operator', 'no data'), str(check_category( passed_check["check_id"])).lstrip( "CheckCategories."), passed_check["check_id"], passed_check["check_name"], passed_check["check_result"]["result"], passed_check["file_path"], passed_check["check_class"], passed_check["resource"].split(".")[0], repo['repoRaw']['repository_id'], repo['repoRaw']['digest'], repo['repoRaw']['last_tracking_ts'], repo['repoRaw']['verified_publisher'], repo['repoRaw']['official'], repo['repoRaw']['scanner_disabled'] ] # check = [ # repoChartPathName, # repo['repoName'], # chartPackage['name'], # chartPackage['version'], # chartPackage['ts'], # chartPackage['signed'], # chartPackage['security_report_created_at'], # chartNameFromResultData, # chartPackage['is_operator'], # str(check_category(passed_check["check_id"])).lstrip("CheckCategories."), # passed_check["check_id"], # passed_check["check_name"], # passed_check["check_result"]["result"], # passed_check["file_path"], # passed_check["check_class"], # passed_check["resource"].split(".")[0], # repo['repoRaw']['repository_id'], # repo['repoRaw']['digest'], # repo['repoRaw']['last_tracking_ts'], # repo['repoRaw']['verified_publisher'], # repo['repoRaw']['official'], # repo['repoRaw']['scanner_disabled'] # ] result_lst.append(check) for failed_check in res["results"]["failed_checks"]: chartNameFromResultData = re.search( chartNameFromResultDataExpression, failed_check["resource"]).group( chartNameFromResultDataExpressionGroup) check = [ repoChartPathName, repo['repoName'], chartPackage['name'], chartPackage['version'], chartPackage['ts'], chartPackage.get('signed', 'no data'), chartPackage.get( 'security_report_created_at', 'no data'), chartNameFromResultData, chartPackage.get('is_operator', 'no data'), str(check_category( failed_check["check_id"])).lstrip( "CheckCategories."), failed_check["check_id"], failed_check["check_name"], failed_check["check_result"]["result"], failed_check["file_path"], failed_check["check_class"], failed_check["resource"].split(".")[0], repo['repoRaw']['repository_id'], repo['repoRaw']['digest'], repo['repoRaw']['last_tracking_ts'], repo['repoRaw']['verified_publisher'], repo['repoRaw']['official'], repo['repoRaw']['scanner_disabled'] ] #check.extend(self.add_meta(scan_time)) result_lst.append(check) if results_scan.is_empty(): check = [ repoChartPathName, repo['repoName'], chartPackage['name'], chartPackage['version'], chartPackage['ts'], chartPackage.get('signed', 'no data'), chartPackage.get('security_report_created_at', 'no data'), "empty scan", chartPackage.get('is_operator', 'no data'), "empty scan", "empty scan", "empty scan", "empty scan", "empty scan", "empty scan", "empty scan", repo['repoRaw']['repository_id'], repo['repoRaw']['digest'], repo['repoRaw']['last_tracking_ts'], repo['repoRaw']['verified_publisher'], repo['repoRaw']['official'], repo['repoRaw']['scanner_disabled'] ] #check.extend(self.add_meta(scan_time)) result_lst.append(check) #empty_resources = self.module_resources() except Exception: logging.info('unexpected error in scan') exc_type, exc_value, exc_traceback = sys.exc_info() tb = traceback.format_exception(exc_type, exc_value, exc_traceback) check = [ repoChartPathName, repo['repoName'], chartPackage['name'], chartPackage['version'], chartPackage['ts'], chartPackage.get('signed', 'no data'), chartPackage.get('security_report_created_at', 'no data'), "error in scan", chartPackage.get('is_operator', 'no data'), "error in scan", "error in scan", "error in scan", "error in scan", "error in scan", "error in scan", "error in scan", repo['repoRaw']['repository_id'], "error in scan", "error in scan", repo['repoRaw']['verified_publisher'], repo['repoRaw']['official'], repo['repoRaw']['scanner_disabled'] ] result_lst.append(check) # Summary Results try: logging.info( f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Summaries" ) res = results_scan.get_dict() summary_lst_item = [ repoChartPathName, repo['repoName'], chartPackage['name'], chartPackage['version'], chartPackage['ts'], chartPackage.get('signed', 'No Data'), chartPackage.get('security_report_created_at', 'No Data'), chartPackage['name'], chartPackage.get('is_operator', 'No Data'), "success", res["summary"]["passed"], res["summary"]["failed"], res["summary"]["parsing_errors"] ] except: summary_lst_item = [ repoChartPathName, repo['repoName'], chartPackage['name'], chartPackage['version'], chartPackage['ts'], chartPackage.get('signed', 'No Data'), chartPackage.get('security_report_created_at', 'No Data'), chartPackage['name'], chartPackage.get('is_operator', 'No Data'), "failed", 0, 0, 0 ] summary_lst.append(summary_lst_item) # Helm Dependancies try: res = results_scan.get_dict() logging.info( f"SCAN OF {repo['repoName']}/{chartPackage['name']} | Processing Helm Dependancies" ) #{'common': {'chart_name': 'common', 'chart_version': '0.0.5', 'chart_repo': 'https://charts.adfinis.com', 'chart_status': 'unpacked'}} if chart_deps: for key in chart_deps: logging.info(f" HELMDEP FOUND! {chart_deps[key]}") current_dep = chart_deps[key] dep_item = [ repoChartPathName, #Current chart combined repo/path repo['repoName'], #Current chart reponame chartPackage['name'], #Current chart chartname chartPackage[ 'version'], #Current chart version list(current_dep.values()) [0], #dep dict chart_name list(current_dep.values()) [1], #dep dict chart_version list(current_dep.values()) [2], #dep dict chart_repo list(current_dep.values())[ 3] #dep dict chart_status ] helmdeps_lst.append(dep_item) logging.info(f"CURRENT HELMDEPS LIST {helmdeps_lst}") except: pass # Dep graph per repo if helmdeps_lst: depGraph.draw(f"results/helm-deps-{repo['repoName']}.png", prog='circo') logging.info(f"Global deps usage: {globalDepsUsage}") logging.info(f"Global deps list {globalDepsList}") result_writer.print_csv(summary_lst, result_lst, helmdeps_lst, empty_resources, RESULTS_PATH, repo['repoName'], orgRepoFilename, globalDepsList, globalDepsUsage) empty_resources_total.update(empty_resources) return checks_table, summary_table, all_resources, empty_resources_total, all_dataobj
def run(banner=checkov_banner, argv=sys.argv[1:]): parser = argparse.ArgumentParser( description='Infrastructure as code static analysis') add_parser_args(parser) args = parser.parse_args(argv) # Disable runners with missing system dependencies args.skip_framework = runnerDependencyHandler.disable_incompatible_runners( args.skip_framework) runner_filter = RunnerFilter( framework=args.framework, skip_framework=args.skip_framework, checks=args.check, skip_checks=args.skip_check, download_external_modules=convert_str_to_bool( args.download_external_modules), external_modules_download_path=args.external_modules_download_path, evaluate_variables=convert_str_to_bool(args.evaluate_variables), runners=checkov_runners) if outer_registry: runner_registry = outer_registry runner_registry.runner_filter = runner_filter else: runner_registry = RunnerRegistry(banner, runner_filter, tf_runner(), cfn_runner(), k8_runner(), sls_runner(), arm_runner(), tf_plan_runner(), helm_runner()) if args.version: print(version) return if args.bc_api_key: if args.repo_id is None: parser.error( "--repo-id argument is required when using --bc-api-key") if len(args.repo_id.split('/')) != 2: parser.error( "--repo-id argument format should be 'organization/repository_name' E.g " "bridgecrewio/checkov") source = os.getenv('BC_SOURCE', 'cli') source_version = os.getenv('BC_SOURCE_VERSION', version) logger.debug(f'BC_SOURCE = {source}, version = {source_version}') try: bc_integration.setup_bridgecrew_credentials( bc_api_key=args.bc_api_key, repo_id=args.repo_id, skip_fixes=args.skip_fixes, skip_suppressions=args.skip_suppressions, source=source, source_version=source_version) except Exception as e: logger.error( 'An error occurred setting up the Bridgecrew platform integration. Please check your API token and try again.', exc_info=True) return guidelines = {} if not args.no_guide: guidelines = bc_integration.get_guidelines() if args.check and args.skip_check: parser.error( "--check and --skip-check can not be applied together. please use only one of them" ) return if args.list: print_checks(framework=args.framework) return external_checks_dir = get_external_checks_dir(args) url = None if args.directory: for root_folder in args.directory: file = args.file scan_reports = runner_registry.run( root_folder=root_folder, external_checks_dir=external_checks_dir, files=file, guidelines=guidelines, bc_integration=bc_integration) if bc_integration.is_integration_configured(): bc_integration.persist_repository(root_folder) bc_integration.persist_scan_results(scan_reports) url = bc_integration.commit_repository(args.branch) runner_registry.print_reports(scan_reports, args, url) return elif args.file: scan_reports = runner_registry.run( external_checks_dir=external_checks_dir, files=args.file, guidelines=guidelines, bc_integration=bc_integration) if bc_integration.is_integration_configured(): files = [os.path.abspath(file) for file in args.file] root_folder = os.path.split(os.path.commonprefix(files))[0] bc_integration.persist_repository(root_folder) bc_integration.persist_scan_results(scan_reports) url = bc_integration.commit_repository(args.branch) runner_registry.print_reports(scan_reports, args, url) else: print(f"{banner}") bc_integration.onboarding()
def run(banner=checkov_banner, argv=sys.argv[1:]): parser = argparse.ArgumentParser(description='Infrastructure as code static analysis') add_parser_args(parser) args = parser.parse_args(argv) # bridgecrew uses both the urllib3 and requests libraries, while checkov uses the requests library. # Allow the user to specify a CA bundle to be used by both libraries. bc_integration.setup_http_manager(args.ca_certificate) # Disable runners with missing system dependencies args.skip_framework = runnerDependencyHandler.disable_incompatible_runners(args.skip_framework) runner_filter = RunnerFilter(framework=args.framework, skip_framework=args.skip_framework, checks=args.check, skip_checks=args.skip_check, download_external_modules=convert_str_to_bool(args.download_external_modules), external_modules_download_path=args.external_modules_download_path, evaluate_variables=convert_str_to_bool(args.evaluate_variables), runners=checkov_runners) if outer_registry: runner_registry = outer_registry runner_registry.runner_filter = runner_filter else: runner_registry = RunnerRegistry(banner, runner_filter, tf_graph_runner(), cfn_runner(), k8_runner(), sls_runner(), arm_runner(), tf_plan_runner(), helm_runner(),dockerfile_runner()) if args.version: print(version) return if args.bc_api_key == '': parser.error('The --bc-api-key flag was specified but the value was blank. If this value was passed as a secret, you may need to double check the mapping.') elif args.bc_api_key: logger.debug(f'Using API key ending with {args.bc_api_key[-8:]}') if args.repo_id is None: parser.error("--repo-id argument is required when using --bc-api-key") if len(args.repo_id.split('/')) != 2: parser.error("--repo-id argument format should be 'organization/repository_name' E.g " "bridgecrewio/checkov") source = os.getenv('BC_SOURCE', 'cli') source_version = os.getenv('BC_SOURCE_VERSION', version) logger.debug(f'BC_SOURCE = {source}, version = {source_version}') try: bc_integration.setup_bridgecrew_credentials(bc_api_key=args.bc_api_key, repo_id=args.repo_id, skip_fixes=args.skip_fixes, skip_suppressions=args.skip_suppressions, source=source, source_version=source_version, repo_branch=args.branch) except Exception as e: logger.error('An error occurred setting up the Bridgecrew platform integration. Please check your API token and try again.', exc_info=True) return else: logger.debug('No API key found. Scanning locally only.') guidelines = {} if not args.no_guide: guidelines = bc_integration.get_guidelines() if args.check and args.skip_check: parser.error("--check and --skip-check can not be applied together. please use only one of them") return if args.list: print_checks(framework=args.framework) return external_checks_dir = get_external_checks_dir(args) url = None if args.directory: exit_codes = [] for root_folder in args.directory: file = args.file scan_reports = runner_registry.run(root_folder=root_folder, external_checks_dir=external_checks_dir, files=file, guidelines=guidelines, bc_integration=bc_integration) if bc_integration.is_integration_configured(): bc_integration.persist_repository(root_folder) bc_integration.persist_scan_results(scan_reports) url = bc_integration.commit_repository(args.branch) exit_codes.append(runner_registry.print_reports(scan_reports, args, url)) exit_code = 1 if 1 in exit_codes else 0 return exit_code elif args.file: scan_reports = runner_registry.run(external_checks_dir=external_checks_dir, files=args.file, guidelines=guidelines, bc_integration=bc_integration) if bc_integration.is_integration_configured(): files = [os.path.abspath(file) for file in args.file] root_folder = os.path.split(os.path.commonprefix(files))[0] bc_integration.persist_repository(root_folder) bc_integration.persist_scan_results(scan_reports) url = bc_integration.commit_repository(args.branch) return runner_registry.print_reports(scan_reports, args, url) elif args.docker_image: if args.bc_api_key is None: parser.error("--bc-api-key argument is required when using --docker-image") return if args.dockerfile_path is None: parser.error("--dockerfile-path argument is required when using --docker-image") return if args.branch is None: parser.error("--branch argument is required when using --docker-image") return image_scanner.scan(args.docker_image, args.dockerfile_path) else: print(f"{banner}") bc_integration.onboarding()