def read_algorithm_config(config_file): """Read config file and return a list of values""" LOG.info("Reading config file ...") config_data = () with open(config_file, "r") as f: data = f.read() out_re = data.replace("\r", "").replace(" ", "") out_ind = out_re.split('\n') config_data = (out_ind[0].split(':')[1], out_ind[1].split(':')[1], out_ind[2].split(':')[1], out_ind[3].split(':')[1]) # config_data (Population_Range, Termination, Adaptive_Mutation_Step, # Survivor_Selection) LOG.info("{0}".format(config_data)) return config_data
def nodejs_build(src, reports_dir, lang_tools): """ Automatically build nodejs project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ cmd_args = lang_tools.get("npm") yarn_mode = False rush_mode = False rushjson_files = [p.as_posix() for p in Path(src).glob("rush.json")] pjson_files = [p.as_posix() for p in Path(src).glob("package.json")] ylock_files = [p.as_posix() for p in Path(src).glob("yarn.lock")] if ylock_files: cmd_args = lang_tools.get("yarn") yarn_mode = True elif rushjson_files: cmd_args = lang_tools.get("rush") rush_mode = True elif not pjson_files: LOG.debug( "Nodejs auto build is supported only for npm or yarn or rush based projects" ) return False cp = exec_tool("auto-build", cmd_args, src) if cp: ret = cp.returncode == 0 else: ret = False try: cmd_args = ["npm"] if yarn_mode: cmd_args = ["yarn"] if rush_mode: cmd_args = ["rush", "rebuild"] else: cmd_args += ["run", "build"] exec_tool("auto-build", cmd_args, src) except Exception: if rush_mode: LOG.warning( "Automatic build for rush.js has failed. Try installing the packages manually before invoking scan.\nIf this works then let us know the build steps by filing an issue." ) else: LOG.debug("Automatic build has failed for the node.js project") return ret
def php_build(src, reports_dir, lang_tools): """ Automatically build php project :param src: Source directory :param reports_dir: Reports directory to store any logs :param lang_tools: Language specific build tools :return: boolean status from the build. True if the command executed successfully. False otherwise """ ret = False cmd_args = lang_tools.get("install") cjson_files = [p.as_posix() for p in Path(src).glob("composer.json")] # If there is no composer.json try to create one if not cjson_files: cp = exec_tool( "auto-build", lang_tools.get("init"), src, env=os.environ.copy(), stdout=subprocess.PIPE, ) if cp: LOG.debug(cp.stdout) cp = exec_tool("auto-build", cmd_args, src, env=os.environ.copy(), stdout=subprocess.PIPE) if cp: LOG.debug(cp.stdout) ret = cp.returncode == 0 # If composer install fails, try composer update if not ret: cmd_args = lang_tools.get("update") cp = exec_tool("auto-build", cmd_args, src, env=os.environ.copy(), stdout=subprocess.PIPE) if cp: LOG.debug(cp.stdout) ret = cp.returncode == 0 return ret
def exec_tool( # scan:ignore tool_name, args, cwd=None, env=utils.get_env(), stdout=subprocess.DEVNULL): """ Convenience method to invoke cli tools Args: tool_name Tool name args cli command and args cwd Current working directory env Environment variables stdout stdout configuration for run command Returns: CompletedProcess instance """ with Progress( console=console, redirect_stderr=False, redirect_stdout=False, refresh_per_second=1, ) as progress: task = None try: env = use_java(env) LOG.debug('⚡︎ Executing {} "{}"'.format(tool_name, " ".join(args))) stderr = subprocess.DEVNULL if LOG.isEnabledFor(DEBUG): stderr = subprocess.STDOUT tool_verb = "Scanning with" if "init" in tool_name: tool_verb = "Initializing" elif "build" in tool_name: tool_verb = "Building with" task = progress.add_task("[green]" + tool_verb + " " + tool_name, total=100, start=False) cp = subprocess.run( args, stdout=stdout, stderr=stderr, cwd=cwd, env=env, check=False, shell=False, encoding="utf-8", ) if cp and stdout == subprocess.PIPE: for line in cp.stdout: progress.update(task, completed=5) if (cp and LOG.isEnabledFor(DEBUG) and cp.returncode and cp.stdout is not None): LOG.debug(cp.stdout) progress.update(task, completed=100, total=100) return cp except Exception as e: if task: progress.update(task, completed=20, total=10, visible=False) if not LOG.isEnabledFor(DEBUG): LOG.info( f"{tool_name} has reported few errors. To view, pass the environment variable SCAN_DEBUG_MODE=debug" ) LOG.debug(e) return None
def execute_default_cmd( # scan:ignore cmd_map_list, type_str, tool_name, src, reports_dir, convert, scan_mode, repo_context, ): """ Method to execute default command for the given type Args: cmd_map_list Default commands in the form of a dict (multiple) or list type_str Project type tool_name Tool name src Project dir reports_dir Directory for output reports convert Boolean to enable normalisation of reports json scan_mode Scan mode string repo_context Repo context """ # Check if there is a default command specified for the given type # Create the reports dir report_fname_prefix = os.path.join(reports_dir, tool_name + "-report") # Look for any additional direct arguments for the tool and inject them if config.get(tool_name + "_direct_args"): direct_args = config.get(tool_name + "_direct_args").split(" ") if direct_args: cmd_map_list += direct_args src_or_file = src if config.get("SHIFTLEFT_ANALYZE_FILE"): src_or_file = config.get("SHIFTLEFT_ANALYZE_FILE") default_cmd = " ".join(cmd_map_list) % dict( src=src, src_or_file=src_or_file, reports_dir=reports_dir, report_fname_prefix=report_fname_prefix, type=type_str, scan_mode=scan_mode, ) # Try to detect if the output could be json outext = ".out" if "json" in default_cmd: outext = ".json" elif "csv" in default_cmd: outext = ".csv" elif "sarif" in default_cmd: outext = ".sarif" elif "xml" in default_cmd: outext = ".xml" report_fname = report_fname_prefix + outext # If the command doesn't support file output then redirect stdout automatically stdout = None if LOG.isEnabledFor(DEBUG): stdout = None if reports_dir and report_fname_prefix not in default_cmd: report_fname = report_fname_prefix + outext stdout = io.open(report_fname, "w") LOG.debug("Output will be written to {}".format(report_fname)) # If the command is requesting list of files then construct the argument filelist_prefix = "(filelist=" if default_cmd.find(filelist_prefix) > -1: si = default_cmd.find(filelist_prefix) ei = default_cmd.find(")", si + 10) ext = default_cmd[si + 10:ei] filelist = utils.find_files(src, ext) # Temporary fix for the yaml issue if ext == "yaml": yml_list = utils.find_files(src, "yml") if yml_list: filelist.extend(yml_list) delim = " " default_cmd = default_cmd.replace(filelist_prefix + ext + ")", delim.join(filelist)) cmd_with_args = default_cmd.split(" ") # Suppress psalm output if should_suppress_output(type_str, cmd_with_args[0]): stdout = subprocess.DEVNULL exec_tool(tool_name, cmd_with_args, cwd=src, stdout=stdout) # Should we attempt to convert the report to sarif format if should_convert(convert, tool_name, cmd_with_args[0], report_fname): crep_fname = utils.get_report_file(tool_name, reports_dir, convert, ext_name="sarif") if (cmd_with_args[0] == "java" or "pmd-bin" in cmd_with_args[0] or "php" in tool_name): convertLib.convert_file( tool_name, cmd_with_args, src, report_fname, crep_fname, ) else: convertLib.convert_file( cmd_with_args[0], cmd_with_args[1:], src, report_fname, crep_fname, ) try: if not LOG.isEnabledFor(DEBUG): os.remove(report_fname) except Exception: LOG.debug("Unable to remove file {}".format(report_fname)) elif type_str == "depscan": # Convert depscan and license scan files to html depscan_files = utils.find_files(reports_dir, "depscan", True) for df in depscan_files: if not df.endswith(".html"): depscan_data = grafeas.parse(df) if depscan_data and len(depscan_data): html_fname = df.replace(".json", ".html") grafeas.render_html(depscan_data, html_fname) track({ "id": config.get("run_uuid"), "depscan_summary": depscan_data }) LOG.debug( "Depscan and HTML report written to file: %s, %s :thumbsup:", df, html_fname, ) licence_files = utils.find_files(reports_dir, "license", True) for lf in licence_files: if not lf.endswith(".html"): licence_data = licence.parse(lf) if licence_data and len(licence_data): html_fname = lf.replace(".json", ".html") licence.render_html(licence_data, html_fname) track({ "id": config.get("run_uuid"), "license_summary": licence_data }) LOG.debug( "License check and HTML report written to file: %s, %s :thumbsup:", lf, html_fname, )
def find_repo_details(src_dir=None): """Method to find repo details such as url, sha etc This will be populated into versionControlProvenance attribute :param src_dir: Source directory """ # See if repository uri is specified in the config repositoryName = None repositoryUri = "" revisionId = "" branch = "" invokedBy = "" pullRequest = False gitProvider = "" ciProvider = "" """ Since CI servers typically checkout repo in detached mode, we need to rely on environment variables as a starting point to find the repo details. To make matters worse, since we run the tools inside a container these variables should be passed as part of the docker run command. With native integrations such as GitHub action and cloudbuild this could be taken care by our builders. Env variables detection for popular CI server is implemented here anyways. But they are effective only in few cases. Azure pipelines - https://docs.microsoft.com/en-us/azure/devops/pipelines/build/variables?view=azure-devops&tabs=yaml BitBucket - https://confluence.atlassian.com/bitbucket/environment-variables-in-bitbucket-pipelines-794502608.html GitHub actions - https://help.github.com/en/actions/automating-your-workflow-with-github-actions/using-environment-variables Google CloudBuild - https://cloud.google.com/cloud-build/docs/configuring-builds/substitute-variable-values CircleCI - https://circleci.com/docs/2.0/env-vars/#built-in-environment-variables Travis - https://docs.travis-ci.com/user/environment-variables/#default-environment-variables AWS CodeBuild - https://docs.aws.amazon.com/codebuild/latest/userguide/build-env-ref-env-vars.html GitLab - https://docs.gitlab.com/ee/ci/variables/predefined_variables.html Jenkins - https://jenkins.io/doc/book/pipeline/jenkinsfile/#using-environment-variables """ for key, value in os.environ.items(): # Check REPOSITORY_URL first followed CI specific vars # Some CI such as GitHub pass only the slug instead of the full url :( if not gitProvider or not ciProvider: if key.startswith("GITHUB_"): if key == "GITHUB_REPOSITORY": gitProvider = "github" if key == "GITHUB_ACTION": ciProvider = "github" elif key.startswith("GITLAB_"): gitProvider = "gitlab" if key == "GITLAB_CI": ciProvider = "gitlab" elif key.startswith("BITBUCKET_"): gitProvider = "bitbucket" if key == "BITBUCKET_BUILD_NUMBER": ciProvider = "bitbucket" elif key.startswith("CIRCLE_"): ciProvider = "circle" elif key.startswith("TRAVIS_"): ciProvider = "travis" elif key.startswith("CODEBUILD_"): ciProvider = "codebuild" elif key.startswith("BUILD_REQUESTEDFOREMAIL"): ciProvider = "azure" elif key.startswith("JENKINS_"): ciProvider = "jenkins" if not repositoryName: if key in [ "BUILD_REPOSITORY_NAME", "GITHUB_REPOSITORY", "BITBUCKET_REPO_SLUG", "REPO_NAME", "CIRCLE_PROJECT_REPONAME", "TRAVIS_REPO_SLUG", "CI_PROJECT_NAME", ]: if "/" in value: repositoryName = value.split("/")[-1] else: repositoryName = value if not repositoryUri: if key in [ "REPOSITORY_URL", "BUILD_REPOSITORY_URI", "GITHUB_REPOSITORY", "BITBUCKET_GIT_HTTP_ORIGIN", "REPO_NAME", "CIRCLE_REPOSITORY_URL", "TRAVIS_REPO_SLUG", "CODEBUILD_SOURCE_REPO_URL", "CI_REPOSITORY_URL", ]: repositoryUri = value if key in [ "COMMIT_SHA", "BUILD_SOURCEVERSION", "BITBUCKET_COMMIT", "GITHUB_SHA", "CIRCLE_SHA1", "TRAVIS_COMMIT", "CODEBUILD_SOURCE_VERSION", "CI_COMMIT_SHA", ]: revisionId = value if key in [ "BRANCH", "BUILD_SOURCEBRANCH", "BITBUCKET_BRANCH", "GITHUB_REF", "BRANCH_NAME", "CIRCLE_BRANCH", "TRAVIS_BRANCH", "CI_COMMIT_REF_NAME", ]: branch = value if key in [ "BUILD_REQUESTEDFOREMAIL", "GITHUB_ACTOR", "PROJECT_ID", "CIRCLE_USERNAME", "GITLAB_USER_EMAIL", ]: invokedBy = value if key.startswith("CI_MERGE_REQUEST"): pullRequest = True if src_dir and os.path.isdir(os.path.join(src_dir, ".git")): # Try interacting with git try: repo = Repo(src_dir) head = repo.head if not branch and not head.is_detached: branch = repo.active_branch.name if not revisionId and head: revisionId = head.commit.hexsha if not repositoryUri: repositoryUri = next(iter(repo.remote().urls)) if not invokedBy or "@" not in invokedBy: if head and head.commit.author and head.commit.author.email: invokedBy = "{} <{}>".format( head.commit.author.name, head.commit.author.email ) except Exception: LOG.debug("Unable to find repo details from the local repository") if branch.startswith("refs/pull"): pullRequest = True branch = branch.replace("refs/pull/", "") # Cleanup the variables branch = branch.replace("refs/heads/", "") if repositoryUri: repositoryUri = repositoryUri.replace( "[email protected]:", "https://github.com/" ).replace(".git", "") # Is it a repo slug? repo_slug = True repositoryUri = sanitize_url(repositoryUri) for pref in repo_url_prefixes: if repositoryUri.startswith(pref): repo_slug = False break if not repo_slug: if "vs-ssh" in repositoryUri: repo_slug = False # For repo slug just assume github for now if repo_slug: repositoryUri = "https://github.com/" + repositoryUri if not repositoryName and repositoryUri: repositoryName = os.path.basename(repositoryUri) if not gitProvider: if "github" in repositoryUri: gitProvider = "github" if "gitlab" in repositoryUri: gitProvider = "gitlab" if "atlassian" in repositoryUri or "bitbucket" in repositoryUri: gitProvider = "bitbucket" if "azure" in repositoryUri or "visualstudio" in repositoryUri: gitProvider = "azure" if not ciProvider: ciProvider = "azure" if not gitProvider and "tfs" in repositoryUri: gitProvider = "tfs" ciProvider = "tfs" return { "gitProvider": gitProvider, "ciProvider": ciProvider, "repositoryName": "" if not repositoryName else repositoryName, "repositoryUri": repositoryUri, "revisionId": revisionId, "branch": branch, "invokedBy": invokedBy, "pullRequest": pullRequest, "botUser": is_bot(invokedBy), }
def convert_sarif(app_name, repo_context, sarif_files, findings_fname): """ Method to convert sarif to findings json :param app_name: Application name :param sarif_file: :param findings_fname: :return: """ finding_id = 1 with open(findings_fname, mode="w") as out_file: for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): continue # Iterate through all the runs for run in report_data["runs"]: try: rules = { r["id"]: r for r in run["tool"]["driver"]["rules"] } results = run["results"] for result in results: rule = rules.get(result["ruleId"]) for location in result["locations"]: finding = { "app": app_name, "type": "vuln", "title": result["message"]["text"], "description": rule["fullDescription"]["text"], "internal_id": "{}/{}".format( result["ruleId"], utils.calculate_line_hash( location["physicalLocation"] ["region"]["snippet"]["text"]), ), "severity": convert_severity(result["properties"] ["issue_severity"]), "owasp_category": "", "category": result["ruleId"], "details": { "repoContext": repo_context, "name": result["message"]["text"], "tags": ",".join(rule["properties"]["tags"]), "fileName": location["physicalLocation"] ["artifactLocation"]["uri"], "DATA_TYPE": "OSS_SCAN", "lineNumber": location["physicalLocation"]["region"] ["startLine"], }, } out_file.write(json.dumps(finding)) finding_id = finding_id + 1 except Exception as e: LOG.warning( "Unable to convert the run to findings format")
def convert_sarif(app_name, repo_context, sarif_files, findings_fname): """ Method to convert sarif to findings json :param app_name: Application name :param sarif_file: :param findings_fname: :return: """ finding_id = 1 findings_list = [] rule_id_owasp_cache = {} for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = None try: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): continue # Iterate through all the runs for run in report_data["runs"]: results = run.get("results") if not results: continue tool_name = run.get("tool", {}).get("driver", {}).get("name") rules = { r["id"]: r for r in run.get("tool", {}).get("driver", {}).get("rules") if r and r.get("id") } for result in results: rule_id = result.get("ruleId", "") rule = rules.get(rule_id) if not rule: continue owasp_category = rule_id_owasp_cache.get(rule_id, "") if not owasp_category: # Check the config for any available owasp category mapping for rok, rov in config.get("rules_owasp_category").items(): if ( rok.upper() == rule_id.upper() or rok.upper() in rule_id.upper() ): rule_id_owasp_cache[rule_id] = rov owasp_category = rov category = rule.get("name") if not category: category = rule_id desc = get_help( rule_id, rule_obj=rule, tool_name=tool_name, owasp_category=owasp_category, ) short_desc = rule.get("shortDescription", {}).get("text") if not short_desc: short_desc = result.get("message", {}).get("text") ngsev = convert_severity( result.get("properties", {})["issue_severity"] ) # Populate tags tags = [] if "CWE" in rule_id: tags.append( { "key": "cwe_category", "value": rule_id.replace("CWE-", ""), "shiftleft_managed": True, } ) if "CKV_" in rule_id or "CIS_" in rule_id or "AWS" in rule_id: cis_rule = cis.get_rule(rule_id) if cis_rule: tags.append( { "key": "cis_category", "value": cis_rule.get("id", ""), "shiftleft_managed": False, } ) if cis_rule.get("scored"): tags.append( { "key": "cis_status", "value": "SCORED", "shiftleft_managed": False, } ) for location in result.get("locations"): filename = location["physicalLocation"]["artifactLocation"][ "uri" ] lineno = location.get("physicalLocation", {})["region"][ "startLine" ] end_lineno = location.get("physicalLocation", {})[ "contextRegion" ]["endLine"] finding = { "app": app_name, "type": "extscan", "title": result.get("message", {})["text"], "description": desc, "internal_id": "{}/{}".format( rule_id, utils.calculate_line_hash( filename, lineno, end_lineno, location.get("physicalLocation", {})["region"][ "snippet" ]["text"], short_desc, ), ), "severity": ngsev, "owasp_category": owasp_category, "category": category, "details": { "repoContext": repo_context, "name": result.get("message", {})["text"], "tags": ",".join(rule["properties"]["tags"]), "fileName": filename, "DATA_TYPE": "OSS_SCAN", "lineNumber": lineno, "ruleId": rule_id, "ruleName": rule.get("name"), "contextText": location.get("physicalLocation", {})[ "region" ]["snippet"]["text"], "snippetText": location.get("physicalLocation", {})[ "contextRegion" ]["snippet"]["text"], }, "tags": tags, } findings_list.append(finding) finding_id = finding_id + 1 except Exception as e: LOG.debug(e) continue with open(findings_fname, mode="w") as out_file: json.dump({"findings": findings_list}, out_file)
def report( tool_name, tool_args, working_dir, metrics, skips, issues, crep_fname, file_path_list=None, ): """Prints issues in SARIF format :param tool_name: tool name :param tool_args: Args used for the tool :param working_dir: Working directory :param metrics: metrics data :param skips: skips data :param issues: issues data :param crep_fname: The output file name :param file_path_list: Full file path for any manipulation :return serialized_log: SARIF output data """ if not tool_args: tool_args = [] tool_args_str = tool_args if isinstance(tool_args, list): tool_args_str = " ".join(tool_args) repo_details = find_repo_details(working_dir) log_uuid = str(uuid.uuid4()) run_uuid = config.get("run_uuid") # working directory to use in the log WORKSPACE_PREFIX = config.get("WORKSPACE", None) wd_dir_log = WORKSPACE_PREFIX if WORKSPACE_PREFIX is not None else working_dir driver_name = config.tool_purpose_message.get(tool_name, tool_name) # Construct SARIF log log = om.SarifLog( schema_uri="https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", version="2.1.0", inline_external_properties=[ om.ExternalProperties(guid=log_uuid, run_guid=run_uuid) ], runs=[ om.Run( automation_details=om.RunAutomationDetails( guid=log_uuid, description=om.Message( text="Static Analysis Security Test results using @ShiftLeft/sast-scan" ), ), tool=om.Tool( driver=om.ToolComponent( name=driver_name, full_name=driver_name, version="1.0.0-scan" ) ), invocations=[ om.Invocation( end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT), execution_successful=True, working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)), ) ], conversion={ "tool": om.Tool( driver=om.ToolComponent(name="@ShiftLeft/sast-scan") ), "invocation": om.Invocation( execution_successful=True, command_line=tool_args_str, arguments=tool_args, working_directory=om.ArtifactLocation(uri=to_uri(wd_dir_log)), end_time_utc=datetime.datetime.utcnow().strftime(TS_FORMAT), ), }, version_control_provenance=[ om.VersionControlDetails( repository_uri=repo_details["repositoryUri"], branch=repo_details["branch"], revision_id=repo_details["revisionId"], ) ], ) ], ) run = log.runs[0] invocation = run.invocations[0] add_skipped_file_notifications(skips, invocation) add_results(tool_name, issues, run, file_path_list, working_dir) serialized_log = to_json(log) if crep_fname: html_file = crep_fname.replace(".sarif", ".html") with io.open(crep_fname, "w") as fileobj: fileobj.write(serialized_log) if tool_name != "empty-scan": render_html(json.loads(serialized_log), html_file) if fileobj.name != sys.stdout.name: LOG.debug( "SARIF and HTML report written to file: %s, %s :thumbsup:", fileobj.name, html_file, ) return serialized_log
def report( tool_name, tool_args, working_dir, metrics, skips, issues, crep_fname, file_path_list=None, ): """Prints issues in SARIF format :param tool_name: tool name :param tool_args: Args used for the tool :param working_dir: Working directory :param metrics: metrics data :param skips: skips data :param issues: issues data :param crep_fname: The output file name :param file_path_list: Full file path for any manipulation :return serialized_log: SARIF output data """ if not tool_args: tool_args = [] tool_args_str = tool_args if isinstance(tool_args, list): tool_args_str = " ".join(tool_args) repo_details = find_repo_details(working_dir) log_uuid = str(uuid.uuid4()) run_uuid = config.get("run_uuid") # Populate metrics metrics = { "total": 0, "critical": 0, "high": 0, "medium": 0, "low": 0, } total = 0 for issue in issues: issue_dict = issue_from_dict(issue).as_dict() rule_id = issue_dict.get("test_id") # Is this rule ignored globally? if rule_id in config.ignored_rules: continue total += 1 issue_severity = issue_dict["issue_severity"] # Fix up severity for certain tools issue_severity = tweak_severity(tool_name, issue_dict) key = issue_severity.lower() if not metrics.get(key): metrics[key] = 0 metrics[key] += 1 metrics["total"] = total # working directory to use in the log WORKSPACE_PREFIX = config.get("WORKSPACE", None) wd_dir_log = WORKSPACE_PREFIX if WORKSPACE_PREFIX is not None else working_dir driver_name = config.tool_purpose_message.get(tool_name, tool_name) if tool_name != "inspect" and config.get("CI") or config.get( "GITHUB_ACTIONS"): driver_name = "ShiftLeft " + driver_name # Construct SARIF log log = om.SarifLog( schema_uri= "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json", version="2.1.0", inline_external_properties=[ om.ExternalProperties(guid=log_uuid, run_guid=run_uuid) ], runs=[ om.Run( automation_details=om.RunAutomationDetails( guid=log_uuid, description=om.Message( text= "Static Analysis Security Test results using @ShiftLeft/sast-scan" ), ), tool=om.Tool(driver=om.ToolComponent(name=driver_name)), invocations=[ om.Invocation( end_time_utc=datetime.datetime.utcnow().strftime( TS_FORMAT), execution_successful=True, working_directory=om.ArtifactLocation( uri=to_uri(wd_dir_log)), ) ], conversion={ "tool": om.Tool(driver=om.ToolComponent( name="@ShiftLeft/sast-scan")), "invocation": om.Invocation( execution_successful=True, command_line=tool_args_str, arguments=tool_args, working_directory=om.ArtifactLocation( uri=to_uri(wd_dir_log)), end_time_utc=datetime.datetime.utcnow().strftime( TS_FORMAT), ), }, properties={"metrics": metrics}, version_control_provenance=[ om.VersionControlDetails( repository_uri=repo_details["repositoryUri"], branch=repo_details["branch"], revision_id=repo_details["revisionId"], ) ], ) ], ) run = log.runs[0] invocation = run.invocations[0] add_skipped_file_notifications(skips, invocation) add_results(tool_name, issues, run, file_path_list, working_dir) serialized_log = to_json(log) if crep_fname: html_file = crep_fname.replace(".sarif", ".html") with io.open(crep_fname, "w") as fileobj: fileobj.write(serialized_log) render_html(json.loads(serialized_log), html_file) if fileobj.name != sys.stdout.name: LOG.debug( "SARIF and HTML report written to file: %s, %s 👍", fileobj.name, html_file, ) return serialized_log
def annotate_pr(self, repo_context, findings_file, report_summary, build_status): if not findings_file: return with open(findings_file, mode="r") as fp: try: gitlab_context = self.get_context(repo_context) findings_obj = json.load(fp) findings = findings_obj.get("findings") if not findings: LOG.debug("No findings from scan available to report") return if not gitlab_context.get( "mergeRequestIID") or not gitlab_context.get( "mergeRequestProjectId"): LOG.debug( "Scan is not running as part of a merge request. Check if the pipeline is using only: [merge_requests] or rules syntax" ) return private_token = self.get_token() if not private_token: LOG.info( "To create a merge request note, create a personal access token with api scope and set it as GITLAB_TOKEN environment variable" ) return summary = "| Tool | Critical | High | Medium | Low | Status |\n" summary = ( summary + "| ---- | ------- | ------ | ----- | ---- | ---- |\n") for rk, rv in report_summary.items(): summary = f'{summary}| {rv.get("tool")} | {rv.get("critical")} | {rv.get("high")} | {rv.get("medium")} | {rv.get("low")} | {rv.get("status")} |\n' template = config.get("PR_COMMENT_TEMPLATE") recommendation = ( f"Please review the [scan reports]({gitlab_context.get('jobUrl')}/artifacts/browse/reports) before approving this merge request." if build_status == "fail" else "Looks good") apiUrl = (f"{gitlab_context.get('apiUrl')}") mergeRequestIID = (f"{gitlab_context.get('mergeRequestIID')}") mergeRequestProjectId = ( f"{gitlab_context.get('mergeRequestProjectId')}") mergeRequestSourceBranch = ( f"{gitlab_context.get('mergeRequestSourceBranch')}") mergeRequestTargetBranch = ( f"{gitlab_context.get('mergeRequestTargetBranch')}") commitSHA = (f"{gitlab_context.get('commitSHA')}") projectId = (f"{gitlab_context.get('projectId')}") projectName = (f"{gitlab_context.get('projectName')}") projectUrl = (f"{gitlab_context.get('projectUrl')}") jobUrl = (f"{gitlab_context.get('jobUrl')}") jobId = (f"{gitlab_context.get('jobId')}") jobName = (f"{gitlab_context.get('jobName')}") jobToken = (f"{gitlab_context.get('jobToken')}") body = template % dict( summary=summary, recommendation=recommendation, apiUrl=apiUrl, mergeRequestIID=mergeRequestIID, mergeRequestProjectId=mergeRequestProjectId, mergeRequestSourceBranch=mergeRequestSourceBranch, mergeRequestTargetBranch=mergeRequestTargetBranch, commitSHA=commitSHA, projectId=projectId, projectName=projectName, projectUrl=projectUrl, jobUrl=jobUrl, jobId=jobId, jobName=jobName, jobToken=jobToken) rr = requests.post( self.get_mr_notes_url(repo_context), headers={ "Content-Type": "application/json", "PRIVATE-TOKEN": self.get_token(), }, json={"body": body}, ) if not rr.ok: LOG.debug(rr.json()) except Exception as e: LOG.debug(e)
def execute_default_cmd( cmd_map_list, type_str, tool_name, src, reports_dir, convert, scan_mode, repo_context, ): """ Method to execute default command for the given type Args: cmd_map_list Default commands in the form of a dict (multiple) or list type_str Project type tool_name Tool name src Project dir reports_dir Directory for output reports convert Boolean to enable normalisation of reports json scan_mode Scan mode string repo_context Repo context """ # Check if there is a default command specified for the given type # Create the reports dir os.makedirs(reports_dir, exist_ok=True) report_fname_prefix = os.path.join(reports_dir, tool_name + "-report") default_cmd = " ".join(cmd_map_list) % dict( src=src, reports_dir=reports_dir, report_fname_prefix=report_fname_prefix, type=type_str, scan_mode=scan_mode, ) # Try to detect if the output could be json outext = ".out" if default_cmd.find("json") > -1: outext = ".json" if default_cmd.find("csv") > -1: outext = ".csv" if default_cmd.find("sarif") > -1: outext = ".sarif" report_fname = report_fname_prefix + outext # If the command doesn't support file output then redirect stdout automatically stdout = None if reports_dir and default_cmd.find(report_fname_prefix) == -1: report_fname = report_fname_prefix + outext stdout = io.open(report_fname, "w") LOG.debug("Output will be written to {}".format(report_fname)) # If the command is requesting list of files then construct the argument filelist_prefix = "(filelist=" if default_cmd.find(filelist_prefix) > -1: si = default_cmd.find(filelist_prefix) ei = default_cmd.find(")", si + 10) ext = default_cmd[si + 10 : ei] filelist = utils.find_files(src, ext) delim = " " default_cmd = default_cmd.replace( filelist_prefix + ext + ")", delim.join(filelist) ) cmd_with_args = default_cmd.split(" ") exec_tool(cmd_with_args, cwd=src, stdout=stdout) # Should we attempt to convert the report to sarif format if ( convert and config.tool_purpose_message.get(cmd_with_args[0]) and os.path.isfile(report_fname) ): crep_fname = utils.get_report_file( tool_name, reports_dir, convert, ext_name="sarif" ) convertLib.convert_file( cmd_with_args[0], cmd_with_args[1:], src, report_fname, crep_fname, ) try: if not os.environ.get("SCAN_DEBUG_MODE") == "debug": os.remove(report_fname) except Exception: LOG.debug("Unable to remove file {}".format(report_fname)) elif type_str == "depscan": # Convert depscan and license scan files to html depscan_files = utils.find_files(reports_dir, "depscan", True) for df in depscan_files: if not df.endswith(".html"): depscan_data = grafeas.parse(df) if depscan_data and len(depscan_data): html_fname = df.replace(".json", ".html") grafeas.render_html(depscan_data, html_fname) track( {"id": config.get("run_uuid"), "depscan_summary": depscan_data} ) LOG.debug( "Depscan and HTML report written to file: %s, %s 👍", df, html_fname, ) licence_files = utils.find_files(reports_dir, "license", True) for lf in licence_files: if not lf.endswith(".html"): licence_data = licence.parse(lf) if licence_data and len(licence_data): html_fname = lf.replace(".json", ".html") licence.render_html(licence_data, html_fname) track( {"id": config.get("run_uuid"), "license_summary": licence_data} ) LOG.debug( "License check and HTML report written to file: %s, %s 👍", lf, html_fname, )
def deep_analysis(src, files): has_unsanitised_vulnerabilities = False cfg_list = list() insights = [] framework_route_criteria = is_taintable_function for path in sorted(files, key=os.path.dirname, reverse=True): directory = os.path.dirname(path) project_modules = get_modules(directory, prepend_module_root=False) local_modules = get_directory_modules(directory) LOG.debug(f"Generating AST and CFG for {path}") try: tree = generate_ast(path) if not tree: continue except Exception as e: LOG.debug(e) try: violations = find_insights(tree, path) if violations: insights += violations cfg = make_cfg( tree, project_modules, local_modules, path, allow_local_directory_imports=True, ) cfg_list.append(cfg) except Exception as e: LOG.debug(e) try: # Taint all possible entry points LOG.debug("Determining taints") FrameworkAdaptor(cfg_list, project_modules, local_modules, framework_route_criteria) LOG.debug("Building constraints table") initialize_constraint_table(cfg_list) LOG.debug("About to begin deep analysis") analyse(cfg_list) except Exception as e: LOG.debug(e) LOG.debug("Finding vulnerabilities from the graph") vulnerabilities = find_vulnerabilities( cfg_list, default_blackbox_mapping_file, default_trigger_word_file, ) if vulnerabilities: has_unsanitised_vulnerabilities = any( not isinstance(v, SanitisedVulnerability) for v in vulnerabilities) return vulnerabilities, insights, has_unsanitised_vulnerabilities
def convert_sarif(app_name, repo_context, sarif_files, findings_fname): """ Method to convert sarif to findings json :param app_name: Application name :param sarif_file: :param findings_fname: :return: """ finding_id = 1 findings_list = [] with open(findings_fname, mode="w") as out_file: for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): continue # Iterate through all the runs for run in report_data["runs"]: results = run.get("results") if not results: continue rules = { r["id"]: r for r in run.get("tool", {}).get("driver", {}).get( "rules") if r and r.get("id") } for result in results: rule = rules.get(result.get("ruleId")) if not rule: continue for location in result.get("locations"): filename = location["physicalLocation"][ "artifactLocation"]["uri"] lineno = location.get("physicalLocation", {})["region"]["startLine"] finding = { "app": app_name, "type": "extscan", "title": result.get("message", {}).get("text"), "description": rule.get("fullDescription", {}).get("text"), "internal_id": "{}/{}".format( result["ruleId"], utils.calculate_line_hash( filename, lineno, location.get( "physicalLocation", {})["region"]["snippet"]["text"], ), ), "severity": convert_severity( result.get("properties", {})["issue_severity"]), "owasp_category": "", "category": result["ruleId"], "details": { "repoContext": repo_context, "name": result.get("message", {})["text"], "tags": ",".join(rule["properties"]["tags"]), "fileName": filename, "DATA_TYPE": "OSS_SCAN", "lineNumber": lineno, "ruleId": result["ruleId"], "ruleName": rule.get("name"), "snippetText": location.get( "physicalLocation", {})["region"]["snippet"]["text"], "contextText": location.get("physicalLocation", {}) ["contextRegion"]["snippet"]["text"], }, } findings_list.append(finding) finding_id = finding_id + 1 try: json.dump({"findings": findings_list}, out_file) except Exception: LOG.debug("Unable to convert the run to findings format")
def summary(sarif_files, aggregate_file=None, override_rules={}): """Generate overall scan summary based on the generated SARIF file :param sarif_files: List of generated sarif report files :param aggregate_file: Filename to store aggregate data :param override_rules Build break rules to override for testing :returns dict representing the summary """ report_summary = {} build_status = "pass" # This is the list of all runs which will get stored as an aggregate run_data_list = [] for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.loads(report_file.read()) # skip this file if the data is empty if not report_data or not report_data.get("runs"): LOG.warn("Report file {} is invalid. Skipping ...".format(sf)) continue # Iterate through all the runs for run in report_data["runs"]: # Add it to the run data list for aggregation run_data_list.append(run) tool_desc = run["tool"]["driver"]["name"] tool_name = tool_desc # Initialise report_summary[tool_name] = { "tool": tool_desc, "critical": 0, "high": 0, "medium": 0, "low": 0, "status": "✅", } results = run.get("results", []) metrics = run.get("properties", {}).get("metrics", None) # If the result includes metrics use it. If not compute it if metrics: report_summary[tool_name].update(metrics) report_summary[tool_name].pop("total", None) else: for aresult in results: sev = aresult["properties"]["issue_severity"].lower() report_summary[tool_name][sev] += 1 # Compare against the build break rule to determine status default_rules = config.get("build_break_rules").get("default") tool_rules = config.get("build_break_rules").get(tool_name, {}) build_break_rules = { **default_rules, **tool_rules, **override_rules } for rsev in ["critical", "high", "medium", "low"]: if build_break_rules.get("max_" + rsev) is not None: if (report_summary.get(tool_name).get(rsev) > build_break_rules["max_" + rsev]): report_summary[tool_name]["status"] = "❌" build_status = "fail" # Should we store the aggregate data if aggregate_file: # agg_sarif_file = aggregate_file.replace(".json", ".sarif") # aggregate.sarif_aggregate(run_data_list, agg_sarif_file) aggregate.jsonl_aggregate(run_data_list, aggregate_file) LOG.debug("Aggregate report written to {}\n".format(aggregate_file)) return report_summary, build_status
def extract_from_file(tool_name, working_dir, report_file, file_path_list=None): """Extract properties from reports :param tool_name: tool name :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # Inspect uses vulnerabilities if tool_name == "inspect": file_name_prefix = "" for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location = {} if vuln.get("dataFlow") and vuln.get("dataFlow").get( "dataFlow"): for l in vuln["dataFlow"]["dataFlow"]["list"]: if not is_generic_package( l["location"].get("fileName")): location = l["location"] break fileName = location.get("fileName") if fileName == "N/A": continue if not file_name_prefix: file_name_prefix = find_path_prefix( working_dir, fileName) issues.append({ "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("lineNumber"), "filename": os.path.join(file_name_prefix, fileName), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", }) elif isinstance(report_data, list): issues = report_data else: if tool_name == "checkov": issues = report_data.get("results", {}).get("failed_checks") elif "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data(rfile, file_path_list) return issues, metrics, skips
def extract_from_file( tool_name, tool_args, working_dir, report_file, file_path_list=None ): """Extract properties from reports :param tool_name: tool name :param tool_args: tool args :param working_dir: Working directory :param report_file: Report file :param file_path_list: Full file path for any manipulation :return issues, metrics, skips information """ issues = [] metrics = None skips = [] # If the tools did not produce any result do not crash if not os.path.isfile(report_file): return issues, metrics, skips extn = pathlib.PurePosixPath(report_file).suffix with io.open(report_file, "r") as rfile: # Static check use jsonlines format, duh if tool_name == "staticcheck": contents = rfile.read() try: issues = [ json.loads(str(item)) for item in contents.strip().split("\n") ] except json.decoder.JSONDecodeError: LOG.warning( "staticcheck produced no result since the project was not built before analysis!" ) return issues, metrics, skips if extn == ".json": try: report_data = json.loads(rfile.read()) except json.decoder.JSONDecodeError: return issues, metrics, skips # NG SAST (Formerly Inspect) uses vulnerabilities if tool_name == "ng-sast": for v in report_data.get("vulnerabilities"): if not v: continue vuln = v["vulnerability"] location_list = [] if vuln.get("dataFlow") and vuln.get("dataFlow", {}).get( "dataFlow" ): location_list = convert_dataflow( working_dir, tool_args, vuln["dataFlow"]["dataFlow"]["list"] ) for location in location_list: issues.append( { "rule_id": vuln["category"], "title": vuln["title"], "description": vuln["description"], "score": vuln["score"], "severity": vuln["severity"], "line_number": location.get("line_number"), "filename": location.get("filename"), "first_found": vuln["firstVersionDetected"], "issue_confidence": "HIGH", } ) elif tool_name == "taint-php": for entry in report_data: taint_trace = entry.get("taint_trace") labels = [] if taint_trace: source, sink, labels = get_from_taints(taint_trace) else: source, _, _ = get_from_taints([entry]) issues.append( { "rule_id": entry.get("shortcode"), "test_name": entry.get("type"), "description": "{}: {}".format( entry.get("message"), "\\n".join(labels) ), "link": entry.get("link"), "severity": entry.get("severity"), "issue_confidence": "HIGH", "line_number": source.get("line_number"), "filename": source.get("filename"), } ) elif tool_name == "taint-python": taint_list = report_data.get("vulnerabilities") for taint in taint_list: source = taint.get("source") sink = taint.get("sink") tags = {} for taint_props in [ "source_trigger_word", "source_label", "source_type", "sink_trigger_word", "sink_label", "sink_type", ]: if taint.get(taint_props): tags[taint_props] = taint.get(taint_props) issues.append( { "rule_id": taint.get("rule_id"), "test_name": taint.get("rule_name"), "short_description": taint.get("short_description"), "cwe_category": taint.get("cwe_category"), "owasp_category": taint.get("owasp_category"), "description": taint.get("description"), "severity": taint.get("severity"), "issue_confidence": "HIGH", "line_from": source.get("line_number"), "line_to": sink.get("line_number"), "filename": source.get("path"), "tags": tags, } ) elif tool_name == "phpstan" or tool_name == "source-php": file_errors = report_data.get("files") for filename, messageobj in file_errors.items(): messages = messageobj.get("messages") for msg in messages: # Create a rule id for phpstan rule_word = msg.get("message", "").split(" ")[0] rule_word = "phpstan-" + rule_word.lower() issues.append( { "rule_id": rule_word, "title": msg.get("message"), "line_number": msg.get("line"), "filename": filename, "severity": "LOW", "issue_confidence": "MEDIUM", } ) elif tool_name == "source-js": njs_findings = report_data.get("nodejs", {}) njs_findings.update(report_data.get("templates", {})) for k, v in njs_findings.items(): # Password detection by njsscan is full of false positives if k == "node_password": continue files = v.get("files", []) metadata = v.get("metadata", {}) if not files or not metadata: continue for afile in files: line_number = 0 if afile.get("match_lines"): line_number = afile.get("match_lines")[0] issues.append( { "rule_id": metadata.get("owasp") .replace(":", "-") .replace(" ", "") .lower(), "title": metadata.get("cwe"), "description": metadata.get("description"), "severity": metadata.get("severity"), "line_number": line_number, "filename": afile.get("file_path"), "issue_confidence": "HIGH", } ) elif tool_name == "checkov": if isinstance(report_data, list): for rd in report_data: issues += rd.get("results", {}).get("failed_checks") else: issues = report_data.get("results", {}).get("failed_checks") elif tool_name == "source-ruby": issues = report_data.get("warnings", []) issues += report_data.get("errors", []) elif isinstance(report_data, list): issues = report_data else: if "sec_issues" in report_data: # NodeJsScan uses sec_issues sec_data = report_data["sec_issues"] for key, value in sec_data.items(): if isinstance(value, list): issues = issues + value else: issues.append(value) elif "Issues" in report_data: tmpL = report_data.get("Issues", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) elif "results" in report_data: tmpL = report_data.get("results", []) if tmpL: issues += tmpL else: LOG.debug("%s produced no result" % tool_name) if extn == ".csv": headers, issues = csv_parser.get_report_data(rfile) if extn == ".xml": issues, metrics = xml_parser.get_report_data( rfile, file_path_list=file_path_list, working_dir=working_dir ) return issues, metrics, skips
def summary(sarif_files, depscan_files=None, aggregate_file=None, override_rules={}): """Generate overall scan summary based on the generated SARIF file :param sarif_files: List of generated sarif report files :param aggregate_file: Filename to store aggregate data :param override_rules Build break rules to override for testing :returns dict representing the summary """ report_summary = {} build_status = "pass" # This is the list of all runs which will get stored as an aggregate run_data_list = [] default_rules = config.get("build_break_rules").get("default") depscan_default_rules = config.get("build_break_rules").get("depscan") # Collect stats from depscan files if available if depscan_files: for df in depscan_files: with open(df, mode="r") as drep_file: dep_data = get_depscan_data(drep_file) if not dep_data: continue # depscan-java or depscan-nodejs based on filename dep_type = (os.path.basename(df).replace(".json", "").replace( "-report", "")) metrics, required_pkgs_found = calculate_depscan_metrics( dep_data) report_summary[dep_type] = { "tool": f"""Dependency Scan ({dep_type.replace("depscan-", "")})""", "critical": metrics["critical"], "high": metrics["high"], "medium": metrics["medium"], "low": metrics["low"], "status": ":white_heavy_check_mark:", } report_summary[dep_type].pop("total", None) # Compare against the build break rule to determine status dep_tool_rules = config.get("build_break_rules").get( dep_type, {}) build_break_rules = {**depscan_default_rules, **dep_tool_rules} if override_rules and override_rules.get("depscan"): build_break_rules = { **build_break_rules, **override_rules.get("depscan"), } # Default severity categories for build status build_status_categories = ( "critical", "required_critical", "optional_critical", "high", "required_high", "optional_high", "medium", "required_medium", "optional_medium", "low", "required_low", "optional_low", ) # Issue 233 - Consider only required packages if available if required_pkgs_found: build_status_categories = ( "required_critical", "required_high", "required_medium", "required_low", ) for rsev in build_status_categories: if build_break_rules.get("max_" + rsev) is not None: if metrics.get(rsev) > build_break_rules["max_" + rsev]: report_summary[dep_type]["status"] = ":cross_mark:" build_status = "fail" for sf in sarif_files: with open(sf, mode="r") as report_file: report_data = json.load(report_file) # skip this file if the data is empty if not report_data or not report_data.get("runs"): LOG.warn("Report file {} is invalid. Skipping ...".format(sf)) continue # Iterate through all the runs for run in report_data["runs"]: # Add it to the run data list for aggregation run_data_list.append(run) tool_desc = run["tool"]["driver"]["name"] tool_name = tool_desc # Initialise report_summary[tool_name] = { "tool": tool_desc, "critical": 0, "high": 0, "medium": 0, "low": 0, "status": ":white_heavy_check_mark:", } results = run.get("results", []) metrics = run.get("properties", {}).get("metrics", None) # If the result includes metrics use it. If not compute it if metrics: report_summary[tool_name].update(metrics) report_summary[tool_name].pop("total", None) else: for aresult in results: sev = aresult["properties"]["issue_severity"].lower() report_summary[tool_name][sev] += 1 # Compare against the build break rule to determine status tool_rules = config.get("build_break_rules").get(tool_name, {}) build_break_rules = { **default_rules, **tool_rules, **override_rules } for rsev in ("critical", "high", "medium", "low"): if build_break_rules.get("max_" + rsev) is not None: if (report_summary.get(tool_name).get(rsev) > build_break_rules["max_" + rsev]): report_summary[tool_name][ "status"] = ":cross_mark:" build_status = "fail" # Should we store the aggregate data if aggregate_file: # agg_sarif_file = aggregate_file.replace(".json", ".sarif") # aggregate.sarif_aggregate(run_data_list, agg_sarif_file) aggregate.jsonl_aggregate(run_data_list, aggregate_file) LOG.debug("Aggregate report written to {}\n".format(aggregate_file)) return report_summary, build_status
def inspect_scan(language, src, reports_dir, convert, repo_context): """ Method to perform inspect cloud scan Args: language Project language src Project dir reports_dir Directory for output reports convert Boolean to enable normalisation of reports json repo_context Repo context """ run_uuid = config.get("run_uuid") cpg_mode = config.get("SHIFTLEFT_CPG") env = os.environ.copy() env["SCAN_JAVA_HOME"] = os.environ.get("SCAN_JAVA_8_HOME") report_fname = utils.get_report_file( "ng-sast", reports_dir, convert, ext_name="json" ) sl_cmd = config.get("SHIFTLEFT_NGSAST_CMD") # Check if sl cli is available if not utils.check_command(sl_cmd): LOG.warning( "sl cli is not available. Please check if your build uses shiftleft/scan-java as the image" ) return analyze_files = config.get("SHIFTLEFT_ANALYZE_FILE") analyze_target_dir = config.get( "SHIFTLEFT_ANALYZE_DIR", os.path.join(src, "target") ) extra_args = None if not analyze_files: if language == "java": analyze_files = utils.find_java_artifacts(analyze_target_dir) elif language == "csharp": if not utils.check_dotnet(): LOG.warning( "dotnet is not available. Please check if your build uses shiftleft/scan-csharp as the image" ) return analyze_files = utils.find_csharp_artifacts(src) cpg_mode = True else: if language == "ts" or language == "nodejs": language = "js" extra_args = ["--", "--ts", "--babel"] analyze_files = [src] cpg_mode = True app_name = find_app_name(src, repo_context) branch = repo_context.get("revisionId") if not branch: branch = "master" if not analyze_files: LOG.warning( "Unable to find any build artifacts. Compile your project first before invoking scan or use the auto build feature." ) return if isinstance(analyze_files, list) and len(analyze_files) > 1: LOG.warning( "Multiple files found in {}. Only {} will be analyzed".format( analyze_target_dir, analyze_files[0] ) ) analyze_files = analyze_files[0] sl_args = [ sl_cmd, "analyze", "--no-auto-update" if language == "java" else None, "--wait", "--cpg" if cpg_mode else None, "--" + language, "--tag", "branch=" + branch, "--app", app_name, ] sl_args += [analyze_files] if extra_args: sl_args += extra_args sl_args = [arg for arg in sl_args if arg is not None] LOG.info( "About to perform ShiftLeft NG SAST cloud analysis. This might take a few minutes ..." ) LOG.debug(" ".join(sl_args)) LOG.debug(repo_context) cp = exec_tool("NG SAST", sl_args, src, env=env) if cp.returncode != 0: LOG.warning("NG SAST cloud analyze has failed with the below logs") LOG.debug(sl_args) LOG.info(cp.stderr) return findings_data = fetch_findings(app_name, branch, report_fname) if findings_data and convert: crep_fname = utils.get_report_file( "ng-sast", reports_dir, convert, ext_name="sarif" ) convertLib.convert_file("ng-sast", sl_args[1:], src, report_fname, crep_fname) track({"id": run_uuid, "scan_mode": "ng-sast", "sl_args": sl_args})
def annotate_pr(self, repo_context, findings_file, report_summary, build_status): if not findings_file: return with open(findings_file, mode="r") as fp: try: findings_obj = json.load(fp) findings = findings_obj.get("findings") if not findings: LOG.debug("No findings from scan available to report") return context = self.get_context(repo_context) # Leave a comment on the pull request if context.get("prID") and context.get("bitbucketToken"): summary = "| Tool | Critical | High | Medium | Low | Status |\n" summary = ( summary + "| ---- | ------- | ------ | ----- | ---- | ---- |\n") for rk, rv in report_summary.items(): status_emoji = self.to_emoji(rv.get("status")) summary = f'{summary}| {rv.get("tool")} | {rv.get("critical")} | {rv.get("high")} | {rv.get("medium")} | {rv.get("low")} | {status_emoji} |\n' template = config.get("PR_COMMENT_BASIC_TEMPLATE") recommendation = ( f"Please review the scan reports before approving this pull request for {context.get('prTargetBranch')} branch" if build_status == "fail" else "Looks good") repoOwner = f"{context.get('BITBUCKET_REPO_OWNER')}" repoFullname = f"{context.get('BITBUCKET_REPO_FULL_NAME')}" repoWorkspace = f"{context.get('BITBUCKET_WORKSPACE')}" repoUUID = f"{context.get('BITBUCKET_REPO_UUID')}" prID = f"{context.get('BITBUCKET_PR_ID')}" prTargetBranch = f"{context.get('BITBUCKET_PR_DESTINATION_BRANCH')}" bitbucketToken = f"{context.get('BITBUCKET_TOKEN')}" commitSHA = f"{context.get('BITBUCKET_COMMIT')}" repoId = f"{context.get('BITBUCKET_REPO_UUID')}" projectUrl = f"{context.get('BITBUCKET_REPO_SLUG')}" jobId = f"{context.get('BITBUCKET_BUILD_NUMBER')}" body = template % dict( summary=summary, recommendation=recommendation, repoOwner=repoOwner, repoFullname=repoFullname, repoWorkspace=repoWorkspace, repoUUID=repoUUID, prID=prID, prTargetBranch=prTargetBranch, bitbucketToken=bitbucketToken, commitSHA=commitSHA, repoId=repoId, projectUrl=projectUrl, jobId=jobId, ) rc = requests.post( self.get_pr_comments_url(repo_context), auth=( context.get("repoWorkspace"), context.get("bitbucketToken"), ), headers={"Content-Type": "application/json"}, json={"content": { "raw": body }}, ) if not rc.ok: LOG.debug(rc.json()) else: LOG.debug( "Either build is not part of a PR or variable BITBUCKET_TOKEN was not set with Pull Request write permission" ) total_count = len(findings) data_list = [ { "title": "Safe to merge?", "type": "BOOLEAN", "value": build_status != "fail", }, ] for rk, rv in report_summary.items(): data_list.append({ "title": rv.get("tool"), "type": "TEXT", "value": rv.get("status"), }) scan_id = config.get("run_uuid", "001") # Create a PR report based on the total findings rr = requests.put( f"{self.get_reports_url(repo_context)}-{scan_id}", proxies=proxies, headers={"Content-Type": "application/json"}, json={ "title": "Scan", "details": f"This pull request contains {total_count} issues", "report_type": "SECURITY", "reporter": f"Scan report for {repo_context.get('repositoryName')}", "link": "https://slscan.io", "logo_url": "https://www.shiftleft.io/static/images/ShiftLeft_logo_white.svg", "result": "FAILED" if build_status == "fail" else "PASSED", "data": data_list, }, ) if rr.ok: for f in findings: finternal = f.get("internal_id") tmpA = finternal.split("/") title = tmpA[0] occurrenceHash = tmpA[-1] annotation_url = f"{self.get_reports_url(repo_context)}-{scan_id}/annotations/scan-{occurrenceHash}" fileName = "" lineNumber = None if f.get("details"): fileName = f.get("details", {}).get("fileName") lineNumber = f.get("details", {}).get("lineNumber") workspace = utils.get_workspace(repo_context) # Remove the workspace if workspace: workspace = workspace + "/" fileName = fileName.replace(workspace, "") # Cleanup title and description title = f.get("title") description = f.get("description") if len(title) > len(description) and "\n" in title: description = f.get("title") if "\n" in title: title = title.split("\n")[0] annotation = { "title": "Scan Report", "annotation_type": "VULNERABILITY", "summary": title, "details": description, "severity": self.convert_severity(f.get("severity")), "path": fileName, "line": lineNumber, } ar = requests.put( annotation_url, proxies=proxies, headers={"Content-Type": "application/json"}, json=annotation, ) if not ar.ok: break else: LOG.debug(rr.json()) except Exception as e: LOG.debug(e)
def fetch_findings(app_name, version, report_fname): """ Fetch findings from the NG SAST Cloud """ sl_org = config.get("SHIFTLEFT_ORG_ID", config.get("SHIFTLEFT_ORGANIZATION_ID")) sl_org_token = config.get( "SHIFTLEFT_ORG_TOKEN", config.get("SHIFTLEFT_ORGANIZATION_TOKEN") ) if not sl_org_token: sl_org_token = config.get("SHIFTLEFT_API_TOKEN") findings_api = config.get("SHIFTLEFT_VULN_API") findings_list = [] if sl_org and sl_org_token: findings_api = findings_api % dict( sl_org=sl_org, app_name=app_name, version=version ) query_obj = { "query": { "returnRuntimeData": False, "orderByDirection": "VULNERABILITY_ORDER_DIRECTION_DESC", } } headers = { "Content-Type": "application/json", "Authorization": "Bearer " + sl_org_token, } try: r = requests.post(findings_api, headers=headers, json=query_obj) if r.status_code == 200: findings_data = r.json() if findings_data: findings_list += findings_data.get("vulnerabilities", []) nextPageBookmark = findings_data.get("nextPageBookmark") # Recurse and fetch all pages while nextPageBookmark: LOG.debug("Retrieving findings from next page") r = requests.post( findings_api, headers=headers, json={"pageBookmark": nextPageBookmark}, ) if r.status_code == 200: findings_data = r.json() if findings_data: findings_list += findings_data.get( "vulnerabilities", [] ) nextPageBookmark = findings_data.get("nextPageBookmark") else: nextPageBookmark = None with open(report_fname, mode="w") as rp: json.dump({"vulnerabilities": findings_list}, rp) LOG.debug( "Data written to {}, {}".format( report_fname, len(findings_list) ) ) return findings_list else: if not findings_list: LOG.warning( "Unable to retrieve any findings from NG SAST Cloud. Status {}".format( r.status_code ) ) else: LOG.debug( "Unable to retrieve some findings from NG SAST Cloud. Proceeding with partial list. Status {}".format( r.status_code ) ) return findings_list except Exception as e: LOG.error(e) else: return findings_list
parser.add_option("-g", "--ga-conf", dest="ga_conf", action="store", help="Configuration file of the" \ "genetic algorithm") parser.add_option("-e", "--es-conf", dest="es_conf", action="store", help="Configuration file of the evolutionary strategy") opts = parser.parse_args()[0] try: if len(sys.argv) < 2: raise OptsError("Missing arguments") if opts.ga_conf: algorithm_name = 'GA behavior' LOG.info("Starting GA") pop_size, term, ad_mut_stp, mu_lambda = read_algorithm_config(opts.ga_conf) search_ga(int(term), int(pop_size), ast.literal_eval(ad_mut_stp), ast.literal_eval((mu_lambda))) plot_data(algorithm_name, LOG_NAME) LOG.info("Finish GA") if opts.es_conf: algorithm_name = 'ES behavior' LOG.info("Starting ES") pop_range, term, ad_mut_stp, mu_lambda = read_algorithm_config(opts.es_conf) search_es(int(term), int(pop_range), ast.literal_eval(ad_mut_stp), ast.literal_eval((mu_lambda))) plot_data(algorithm_name, LOG_NAME) LOG.info("Finish ES")