Exemplo n.º 1
0
def dump_parsed_ast(
    to_json: bool, language: str, pattern: Optional[str], targets_str: List[str]
) -> None:
    targets = semgrep.config_resolver.resolve_targets(targets_str)

    with tempfile.NamedTemporaryFile("w") as fout:
        args = []
        if pattern:
            fout.write(pattern)
            fout.flush()
            args = ["-lang", language, "-dump_pattern", fout.name]
        else:
            if len(targets) != 1:
                raise SemgrepError("--dump-ast requires exactly one target file")
            target = targets[0]
            args = ["-lang", language, "-dump_ast", str(target)]

        if to_json:
            args = ["-json"] + args

        cmd = [SEMGREP_PATH] + args
        try:
            output = sub_check_output(cmd)
        except subprocess.CalledProcessError as ex:
            raise SemgrepError(
                f"error invoking semgrep with:\n\t{' '.join(cmd)}\n\t{ex}\n{PLEASE_FILE_ISSUE_TEXT}"
            )
        print(output.decode(errors="replace"))
Exemplo n.º 2
0
    def _raise_semgrep_error_from_json(
        self, error_json: Dict[str, Any], patterns: List[Pattern],
    ) -> None:
        """
        See format_output_exception in semgrep O'Caml for details on schema
        """
        error_type = error_json["error"]
        if error_type == "invalid language":
            raise SemgrepError(
                f'{error_json["language"]} was accepted by semgrep but rejected by semgrep-core. {PLEASE_FILE_ISSUE_TEXT}'
            )
        elif error_type == "invalid pattern":

            matching_pattern = next(
                (p for p in patterns if p._id == error_json["pattern_id"]), None
            )
            if matching_pattern is None or matching_pattern.span is None:
                raise SemgrepError(
                    f"Pattern id from semgrep-core was missing in pattern spans. {PLEASE_FILE_ISSUE_TEXT}"
                )
            matching_span = matching_pattern.span

            raise InvalidPatternError(
                short_msg=error_type,
                long_msg=f"Pattern could not be parsed as a {error_json['language']} semgrep pattern",
                spans=[matching_span],
                help=None,
            )
        # no special formatting ought to be required for the other types; the semgrep python should be performing
        # validation for them. So if any other type of error occurs, ask the user to file an issue
        else:
            raise SemgrepError(
                f'an internal error occured while invoking semgrep-core:\n\t{error_type}: {error_json.get("message", "no message")}\n{PLEASE_FILE_ISSUE_TEXT}'
            )
Exemplo n.º 3
0
def get_config(
        pattern: str, lang: str,
        config: str) -> Tuple[Dict[str, List[Rule]], List[SemgrepError]]:
    # let's check for a pattern
    if pattern:
        # and a language
        if not lang:
            raise SemgrepError(
                "language must be specified when a pattern is passed")

        # TODO for now we generate a manual config. Might want to just call semgrep -e ... -l ...
        configs = semgrep.config_resolver.manual_config(pattern, lang)
    else:
        # else let's get a config. A config is a dict from config_id -> config. Config Id is not well defined at this point.
        try:
            configs = semgrep.config_resolver.resolve_config(config)
        except SemgrepError as e:
            return {}, [e]

    # if we can't find a config, use default r2c rules
    if not configs:
        raise SemgrepError(
            f"No config given and {DEFAULT_CONFIG_FILE} was not found. Try running with --help to debug or if you want to download a default config, try running with --config r2c"
        )

    valid_configs, error = validate_configs(configs)
    return valid_configs, error
Exemplo n.º 4
0
def load_config_from_local_path(
    location: Optional[str] = None, ) -> Dict[str, YamlTree]:
    """
        Return config file(s) as dictionary object
    """
    base_path = get_base_path()
    if location is None:
        default_file = base_path.joinpath(DEFAULT_CONFIG_FILE)
        default_folder = base_path.joinpath(DEFAULT_CONFIG_FOLDER)
        if default_file.exists():
            return parse_config_at_path(default_file)
        elif default_folder.exists():
            return parse_config_folder(default_folder, relative=True)
        else:
            return {}
    else:
        loc = base_path.joinpath(location)
        if loc.exists():
            if loc.is_file():
                return parse_config_at_path(loc)
            elif loc.is_dir():
                return parse_config_folder(loc)
            else:
                raise SemgrepError(
                    f"config location `{loc}` is not a file or folder!")
        else:
            addendum = ""
            if IN_DOCKER:
                addendum = " (since you are running in docker, you cannot specify arbitary paths on the host; they must be mounted into the container)"
            raise SemgrepError(
                f"unable to find a config; path `{loc}` does not exist{addendum}"
            )
Exemplo n.º 5
0
def generate_config() -> None:
    import requests  # here for faster startup times

    # defensive coding
    if Path(DEFAULT_CONFIG_FILE).exists():
        raise SemgrepError(
            f"{DEFAULT_CONFIG_FILE} already exists. Please remove and try again"
        )
    try:
        r = requests.get(TEMPLATE_YAML_URL, timeout=10)
        r.raise_for_status()
        template_str = r.text
    except Exception as e:
        debug_print(str(e))
        print_stderr(
            f"There was a problem downloading the latest template config. Using fallback template"
        )
        template_str = """rules:
  - id: eqeq-is-bad
    pattern: $X == $X
    message: "$X == $X is a useless equality check"
    languages: [python]
    severity: ERROR"""
    try:
        with open(DEFAULT_CONFIG_FILE, "w") as template:
            template.write(template_str)
            print_stderr(
                f"Template config successfully written to {DEFAULT_CONFIG_FILE}"
            )
    except Exception as e:
        raise SemgrepError(str(e))
Exemplo n.º 6
0
def _evaluate_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[Dict[str, Any]],
    flags: Optional[Dict[str, Any]] = None,
) -> Set[Range]:
    if expression.operator in OPERATORS_WITH_CHILDREN:
        if expression.children is None:
            raise SemgrepError(
                f"operator '{expression.operator}' must have child operators")

        # recurse on the nested expressions
        if expression.operator == OPERATORS.AND_EITHER:
            # remove anything that does not equal one of these ranges
            evaluated_ranges = [
                _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    flags=flags,
                ) for expr in expression.children
            ]
            ranges_left.intersection_update(flatten(evaluated_ranges))
        elif expression.operator == OPERATORS.AND_ALL:
            # chain intersection eagerly; intersect for every AND'ed child
            for expr in expression.children:
                remainining_ranges = _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    flags=flags,
                )
                ranges_left.intersection_update(remainining_ranges)
        else:
            raise UnknownOperatorError(
                f"unknown operator {expression.operator}")

        logger.debug(f"after filter `{expression.operator}`: {ranges_left}")
        steps_for_debugging.append({
            "filter": f"{pattern_name_for_operator(expression.operator)}",
            "pattern_id": None,
            "ranges": list(ranges_left),
        })
    else:
        if expression.children is not None:
            raise SemgrepError(
                f"operator '{expression.operator}' must not have child operators"
            )

        ranges_left = _evaluate_single_expression(
            expression,
            pattern_ids_to_pattern_matches,
            ranges_left,
            steps_for_debugging,
            flags=flags,
        )
    return ranges_left
Exemplo n.º 7
0
def _evaluate_expression(
    expression: BooleanRuleExpression,
    pattern_ids_to_pattern_matches: Dict[PatternId, List[PatternMatch]],
    ranges_left: Set[Range],
    steps_for_debugging: List[DebuggingStep],
    allow_exec: bool,
) -> Set[Range]:
    if expression.operator in OPERATORS_WITH_CHILDREN:
        if expression.children is None:
            raise SemgrepError(
                f"operator '{expression.operator}' must have child operators"
            )

        # recurse on the nested expressions
        if expression.operator == OPERATORS.AND_EITHER:
            # remove anything that does not equal one of these ranges
            evaluated_ranges = [
                _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    allow_exec=allow_exec,
                )
                for expr in expression.children
            ]
            ranges_left.intersection_update(flatten(evaluated_ranges))
        elif expression.operator == OPERATORS.AND_ALL:
            # chain intersection eagerly; intersect for every AND'ed child
            for expr in expression.children:
                remainining_ranges = _evaluate_expression(
                    expr,
                    pattern_ids_to_pattern_matches,
                    ranges_left.copy(),
                    steps_for_debugging,
                    allow_exec=allow_exec,
                )
                ranges_left.intersection_update(remainining_ranges)
        else:
            raise UnknownOperatorError(f"unknown operator {expression.operator}")
    else:
        if expression.children is not None:
            raise SemgrepError(
                f"operator '{expression.operator}' must not have child operators"
            )

        ranges_left = _evaluate_single_expression(
            expression,
            pattern_ids_to_pattern_matches,
            ranges_left,
            allow_exec=allow_exec,
        )

    add_debugging_info(
        expression,
        ranges_left,
        pattern_ids_to_pattern_matches,
        steps_for_debugging,
    )
    return ranges_left
Exemplo n.º 8
0
def get_config(
    pattern: str, lang: str, config_strs: List[str]
) -> Tuple[semgrep.config_resolver.Config, List[SemgrepError]]:
    # let's check for a pattern
    if pattern:
        # and a language
        if not lang:
            raise SemgrepError(
                "language must be specified when a pattern is passed")

        # TODO for now we generate a manual config. Might want to just call semgrep -e ... -l ...
        config, errors = semgrep.config_resolver.Config.from_pattern_lang(
            pattern, lang)
    else:
        # else let's get a config. A config is a dict from config_id -> config. Config Id is not well defined at this point.
        config, errors = semgrep.config_resolver.Config.from_config_list(
            config_strs)

    # if we can't find a config, use default r2c rules
    if not config:
        raise SemgrepError(
            f"No config given and {DEFAULT_CONFIG_FILE} was not found. Try running with --help to debug or if you want to download a default config, try running with --config r2c"
        )

    return config, errors
Exemplo n.º 9
0
def apply_fixes(rule_matches_by_rule: RuleMatchMap,
                dryrun: bool = False) -> None:
    """
    Modify files in place for all files with findings from rules with an
    autofix configuration
    """
    modified_files: Set[Path] = set()

    for _, rule_matches in rule_matches_by_rule.items():
        for rule_match in rule_matches:
            fix = rule_match.fix
            fix_regex = rule_match.fix_regex
            filepath = rule_match.path
            if fix:
                try:
                    fixobj = _basic_fix(rule_match, fix)
                except Exception as e:
                    raise SemgrepError(
                        f"unable to modify file {filepath}: {e}")
            elif fix_regex:
                regex = fix_regex.get("regex")
                replacement = fix_regex.get("replacement")
                count = fix_regex.get("count", 0)
                if not regex or not replacement:
                    raise SemgrepError(
                        "'regex' and 'replacement' values required when using 'fix-regex'"
                    )
                try:
                    count = int(count)
                except ValueError:
                    raise SemgrepError(
                        "optional 'count' value must be an integer when using 'fix-regex'"
                    )
                try:
                    fixobj = _regex_replace(rule_match, regex, replacement,
                                            count)
                except Exception as e:
                    raise SemgrepError(
                        f"unable to use regex to modify file {filepath} with fix '{fix}': {e}"
                    )
            else:
                continue
            # endif
            if not dryrun:
                _write_contents(rule_match.path, fixobj.fixed_contents)
                modified_files.add(filepath)
            else:
                rule_match.extra[
                    "fixed_lines"] = fixobj.fixed_lines  # Monkey patch in fixed lines

    num_modified = len(modified_files)
    if len(modified_files):
        logger.info(
            f"successfully modified {num_modified} file{'s' if num_modified > 1 else ''}."
        )
    else:
        logger.info(f"no files modified.")
Exemplo n.º 10
0
def adjust_for_docker() -> None:
    # change into this folder so that all paths are relative to it
    if IN_DOCKER and not IN_GH_ACTION:
        if OLD_SRC_DIRECTORY.exists():
            raise SemgrepError(
                f"Detected Docker environment using old code volume, please use '{SRC_DIRECTORY}' instead of '{OLD_SRC_DIRECTORY}'"
            )
        if not SRC_DIRECTORY.exists():
            raise SemgrepError(
                f"Detected Docker environment without a code volume, please include '-v \"${{PWD}}:{SRC_DIRECTORY}\"'"
            )
    if SRC_DIRECTORY.exists():
        os.chdir(SRC_DIRECTORY)
Exemplo n.º 11
0
def run_spacegrep(patterns: List[Pattern], targets: List[Path]) -> dict:
    matches: List[dict] = []
    errors: List[dict] = []
    for pattern in patterns:
        if not isinstance(pattern._pattern, str):
            raise NotImplementedError(
                f"Support for {type(pattern._pattern)} has not been implemented yet."
            )
        pattern_str = pattern._pattern  # TODO: Handle pattern Dict
        for target in targets:
            cmd = [
                SPACEGREP_PATH,
                "--output-format",
                "semgrep",
                "-d",
                str(target),
                pattern_str,
            ]
            try:
                p = sub_run(cmd,
                            stdout=subprocess.PIPE,
                            stderr=subprocess.PIPE)
                p.check_returncode()
                raw_output = p.stdout

                output_json = _parse_spacegrep_output(raw_output)
                output_json["matches"] = _patch_id(
                    pattern, output_json.get("matches", []))

                matches.extend(output_json["matches"])
                errors.extend(output_json["errors"])
            except subprocess.CalledProcessError as e:
                raw_error = p.stderr
                spacegrep_error_text = raw_error.decode("utf-8")
                raise SemgrepError(
                    f"Error running spacegrep on file {target}: Process error: {e}\n\nspacegrep error: {spacegrep_error_text}"
                )
            except json.JSONDecodeError as e:
                raise SemgrepError(
                    f"Could not parse spacegrep output as JSON: JSON error: {e}"
                )
            except KeyError as e:
                raise SemgrepError(
                    f"Invalid JSON output was received from spacegrep: {e}")

    return {
        "matches": matches,
        "errors": errors,
    }
Exemplo n.º 12
0
def parse_config_string(config_id: str, contents: str,
                        filename: Optional[str]) -> Dict[str, YamlTree]:
    if not contents:
        raise SemgrepError(
            f"Empty configuration file {filename}",
            code=UNPARSEABLE_YAML_EXIT_CODE,
        )
    try:
        data = parse_yaml_preserve_spans(contents, filename)
        return {config_id: data}
    except YAMLError as se:
        raise SemgrepError(
            f"Invalid YAML file {config_id}:\n{indent(str(se))}",
            code=UNPARSEABLE_YAML_EXIT_CODE,
        )
Exemplo n.º 13
0
    def _run_core_command(
        self,
        patterns_json: List[Any],
        patterns: List[Pattern],
        targets: List[Path],
        language: Language,
        rule: Rule,
        rules_file_flag: str,
        cache_dir: str,
    ) -> dict:
        with tempfile.NamedTemporaryFile(
                "w") as pattern_file, tempfile.NamedTemporaryFile(
                    "w") as target_file, tempfile.NamedTemporaryFile(
                        "w") as equiv_file:
            yaml = YAML()
            yaml.dump({"rules": patterns_json}, pattern_file)
            pattern_file.flush()
            target_file.write("\n".join(str(t) for t in targets))
            target_file.flush()

            cmd = [SEMGREP_PATH] + [
                "-lang",
                language,
                rules_file_flag,
                pattern_file.name,
                "-j",
                str(self._jobs),
                "-target_file",
                target_file.name,
                "-use_parsing_cache",
                cache_dir,
                "-timeout",
                str(self._timeout),
                "-max_memory",
                str(self._max_memory),
            ]

            equivalences = rule.equivalences
            if equivalences:
                self._write_equivalences_file(equiv_file, equivalences)
                cmd += ["-equivalences", equiv_file.name]

            core_run = sub_run(cmd,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE)
            logger.debug(core_run.stderr.decode("utf-8", "replace"))

            if core_run.returncode != 0:
                output_json = self._parse_core_output(core_run.stdout)

                if "error" in output_json:
                    self._raise_semgrep_error_from_json(output_json, patterns)
                else:
                    raise SemgrepError(
                        f"unexpected json output while invoking semgrep-core:\n{PLEASE_FILE_ISSUE_TEXT}"
                    )

            output_json = self._parse_core_output(core_run.stdout)

            return output_json
Exemplo n.º 14
0
def get_config(
    pattern: str, lang: str, config_strs: List[str]
) -> Tuple[Config, List[SemgrepError]]:
    if pattern:
        if not lang:
            raise SemgrepError("language must be specified when a pattern is passed")
        config, errors = Config.from_pattern_lang(pattern, lang)
    else:
        config, errors = Config.from_config_list(config_strs)

    if not config:
        raise SemgrepError(
            f"No config given and {DEFAULT_CONFIG_FILE} was not found. Try running with --help to debug or if you want to download a default config, try running with --config r2c"
        )

    return config, errors
Exemplo n.º 15
0
 def _fail(
     self,
     reason: str,
     rule: Rule,
     core_run: subprocess.CompletedProcess,
     returncode: int,
     semgrep_output: str,
     semgrep_error_output: str,
 ) -> None:
     # Once we require python >= 3.8, switch to using shlex.join instead
     # for proper quoting of the command line.
     shell_command = " ".join(core_run.args)
     raise SemgrepError(
         f"semgrep-core failed: {reason}\n"
         f"rule ID: '{rule.id}'\n"
         f"semgrep-core exit code: {returncode}\n"
         f"semgrep-core command: {shell_command}\n"
         f"unexpected non-json output while invoking semgrep-core:\n"
         "--- semgrep-core stdout ---\n"
         f"{semgrep_output}"
         "--- end semgrep-core stdout ---\n"
         "--- semgrep-core stderr ---\n"
         f"{semgrep_error_output}"
         "--- end semgrep-core stderr ---\n"
         f"{PLEASE_FILE_ISSUE_TEXT}")
Exemplo n.º 16
0
def _where_python_statement_matches(where_expression: str,
                                    metavars: Dict[str, Any]) -> bool:
    # TODO: filter out obvious dangerous things here
    result = False

    local_vars = {k: v["abstract_content"] for k, v in metavars.items()}
    RETURN_VAR = "semgrep_pattern_return"
    try:
        cleaned_where_expression = where_expression.strip()
        lines = cleaned_where_expression.split("\n")
        new_last_line = f"{RETURN_VAR} = {lines[-1]}"
        lines[-1] = new_last_line
        to_eval = "\n".join(lines)
        scope = {"vars": local_vars}
        # fmt: off
        exec(
            to_eval, scope
        )  # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected
        # fmt: on
        result = scope[RETURN_VAR]  # type: ignore
    except KeyError as ex:
        logger.error(
            f"could not find metavariable {ex} while evaluating where-python expression '{where_expression}', consider case where metavariable is missing"
        )
    except Exception as ex:
        logger.error(
            f"received error '{repr(ex)}' while evaluating where-python expression '{where_expression}'"
        )

    if not isinstance(result, bool):
        raise SemgrepError(
            f"where-python expression '{where_expression}' needs boolean output but got {result}"
        )
    return result
Exemplo n.º 17
0
def _where_python_statement_matches(where_expression: str,
                                    metavars: Dict[str, Any]) -> bool:
    # TODO: filter out obvious dangerous things here
    result = False

    local_vars = {k: v["abstract_content"] for k, v in metavars.items()}
    RETURN_VAR = "semgrep_pattern_return"
    try:
        cleaned_where_expression = where_expression.strip()
        lines = cleaned_where_expression.split("\n")
        new_last_line = f"{RETURN_VAR} = {lines[-1]}"
        lines[-1] = new_last_line
        to_eval = "\n".join(lines)
        scope = {"vars": local_vars}
        # fmt: off
        exec(
            to_eval, scope
        )  # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected
        # fmt: on
        result = scope[RETURN_VAR]  # type: ignore
    except Exception as ex:
        print_stderr(
            f"error evaluating a where-python expression: `{where_expression}`: {ex}"
        )

    if not isinstance(result, bool):
        raise SemgrepError(
            f"python where expression needs boolean output but got: {result} for {where_expression}"
        )
    return result
Exemplo n.º 18
0
 def into_semgrep_error(self) -> SemgrepError:
     if self._check_id == "Timeout":
         return MatchTimeoutError(self._path, self._rule_id)
     elif self._check_id == "OutOfMemory":
         return OutOfMemoryError(self._path, self._rule_id)
     elif self._check_id == "LexicalError":
         return LexicalError(self._path, self._rule_id)
     else:
         try:
             with open(self._path, errors="replace") as f:
                 file_hash = SourceTracker.add_source(f.read())
         except IOError as e:
             return SemgrepError(f"Could not open '{self._path}': {e}")
         error_span = Span(
             start=self._start,
             end=self._end,
             source_hash=file_hash,
             file=str(self._path),
         )
         return SourceParseError(
             short_msg="parse error",
             long_msg=f"Could not parse {self._path.name} as {self._language}",
             spans=[error_span],
             help="If the code appears to be valid, this may be a semgrep bug.",
         )
Exemplo n.º 19
0
    def close(self) -> None:
        """
        Close the output handler.

        This will write any output that hasn't been written so far. It returns
        the exit code of the program.
        """
        if self.has_output:
            output = self.build_output(self.settings.output_destination is None
                                       and self.stdout.isatty())
            if output:
                print(output, file=self.stdout)
            if self.stats_line:
                logger.info(self.stats_line)

            if self.settings.output_destination:
                self.save_output(self.settings.output_destination, output)

        final_error = None
        error_stats = None
        if self.final_error:
            final_error = self.final_error
        elif self.rule_matches and self.settings.error_on_findings:
            # This exception won't be visible to the user, we're just
            # using this to return a specific error code
            final_error = SemgrepError("", code=FINDINGS_EXIT_CODE)
        elif self.semgrep_structured_errors:
            # make a simplifying assumption that # errors = # files failed
            # it's a quite a bit of work to simplify further because errors may or may not have path, span, etc.
            error_stats = (
                f"{len(self.semgrep_structured_errors)} files could not be analyzed"
            )
            final_error = self.semgrep_structured_errors[-1]
        self.final_raise(final_error, error_stats)
Exemplo n.º 20
0
def _sarif_notification_from_error(error: SemgrepError) -> Dict[str, Any]:
    error_dict = error.to_dict()
    descriptor = error_dict["type"]

    error_to_sarif_level = {
        Level.ERROR.name.lower(): "error",
        Level.WARN.name.lower(): "warning",
    }
    level = error_to_sarif_level[error_dict["level"]]

    message = error_dict.get("message")
    if message is None:
        message = error_dict.get("long_msg")
    if message is None:
        message = error_dict.get("short_msg", "")

    return {
        "descriptor": {
            "id": descriptor
        },
        "message": {
            "text": message
        },
        "level": level,
    }
Exemplo n.º 21
0
    def handle_regex_patterns(
        self,
        outputs: List[PatternMatch],
        patterns_regex: List[Any],
        targets: List[Path],
    ) -> None:
        patterns_json = [pattern.to_json() for pattern in patterns_regex]
        try:
            patterns_re = [(pattern["id"], re.compile(pattern["pattern"]))
                           for pattern in patterns_json]
        except re.error as err:
            raise SemgrepError(f"invalid regular expression specified: {err}")

        if self._testing:
            # Testing functionality runs in a multiprocessing.Pool. We cannot run
            # a Pool inside a Pool, so we have to avoid multiprocessing when testing.
            # https://stackoverflow.com/questions/6974695/python-process-pool-non-daemonic
            matches = [
                get_re_matches(patterns_re, target) for target in targets
            ]
        else:
            re_fn = functools.partial(get_re_matches, patterns_re)
            with multiprocessing.Pool(self._jobs) as pool:
                matches = pool.map(re_fn, targets)

        outputs.extend(single_match for file_matches in matches
                       for single_match in file_matches)
Exemplo n.º 22
0
def compare_where_python(where_expression: str,
                         pattern_match: PatternMatch) -> bool:
    result = False
    return_var = "semgrep_pattern_return"
    lines = where_expression.strip().split("\n")
    to_eval = "\n".join(lines[:-1] + [f"{return_var} = {lines[-1]}"])

    local_vars = {
        metavar: pattern_match.get_metavariable_value(metavar)
        for metavar in pattern_match.metavariables
    }
    scope = {"vars": local_vars}

    try:
        # fmt: off
        exec(
            to_eval, scope
        )  # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected
        # fmt: on
        result = scope[return_var]  # type: ignore
    except KeyError as ex:
        logger.error(
            f"could not find metavariable {ex} while evaluating where-python expression '{where_expression}', consider case where metavariable is missing"
        )
    except Exception as ex:
        logger.error(
            f"received error '{repr(ex)}' while evaluating where-python expression '{where_expression}'"
        )

    if not isinstance(result, bool):
        raise SemgrepError(
            f"where-python expression '{where_expression}' needs boolean output but got {result}"
        )
    return result
Exemplo n.º 23
0
def _where_python_statement_matches(where_expression: str,
                                    metavars: Dict[str, Any]) -> bool:
    # TODO: filter out obvious dangerous things here
    output_var = None

    # HACK: we're executing arbitrary Python in the where-python,
    # be careful my friend
    vars = {k: v["abstract_content"] for k, v in metavars.items()}
    RETURN_VAR = "semgrep_pattern_return"
    try:
        cleaned_where_expression = where_expression.strip()
        lines = cleaned_where_expression.split("\n")
        new_last_line = f"{RETURN_VAR} = {lines[-1]}"
        lines[-1] = new_last_line
        to_eval = "\n".join(lines)
        scope = {"vars": vars}
        # fmt: off
        exec(
            to_eval, scope
        )  # nosem: contrib.dlint.dlint-equivalent.insecure-exec-use, python.lang.security.audit.exec-detected.exec-detected
        # fmt: on
        output_var = scope[RETURN_VAR]
    except Exception as ex:
        print_stderr(
            f"error evaluating a where-python expression: `{where_expression}`: {ex}"
        )

    if type(output_var) != type(True):
        raise SemgrepError(
            f"python where expression needs boolean output but got: {output_var} for {where_expression}"
        )
    return output_var == True
Exemplo n.º 24
0
    def close(self) -> None:
        """
        Close the output handler.

        This will write any output that hasn't been written so far. It returns
        the exit code of the program.
        """
        if self.has_output:
            output = self.build_output(self.settings.output_destination is None
                                       and self.stdout.isatty())
            if output:
                print(output, file=self.stdout)

            if self.settings.output_destination:
                self.save_output(self.settings.output_destination, output)

        final_error = None
        if self.final_error:
            final_error = self.final_error
        elif self.rule_matches and self.settings.error_on_findings:
            # This exception won't be visiable to the user, we're just
            # using this to return a specific error code
            final_error = SemgrepError("", code=FINDINGS_EXIT_CODE)
        elif self.semgrep_structured_errors:
            final_error = self.semgrep_structured_errors[-1]
        self.final_raise(final_error)
Exemplo n.º 25
0
    def _raise_semgrep_error_from_json(
        self,
        error_json: Dict[str, Any],
        patterns: List[Pattern],
        rule: Rule,
    ) -> None:
        """
        See format_output_exception in semgrep O'Caml for details on schema
        """
        error_type = error_json["error"]
        if error_type == "invalid language":
            raise SemgrepError(
                f'{error_json["language"]} was accepted by semgrep but rejected by semgrep-core. {PLEASE_FILE_ISSUE_TEXT}'
            )
        elif error_type == "invalid regexp in rule":
            raise SemgrepError(
                f'Invalid regexp in rule: {error_json["message"]}')
        elif error_type == "invalid pattern":
            if self._optimizations == "all":
                raise InvalidPatternErrorNoSpan(
                    rule_id=error_json.get("pattern_id", "<no rule_id>"),
                    pattern=error_json.get("pattern", "<no pattern>"),
                    language=error_json.get("language", "<no language>"),
                )
            else:
                matching_pattern = next(
                    (p for p in patterns if p._id == error_json["pattern_id"]),
                    None)
                if matching_pattern is None or matching_pattern.span is None:
                    raise SemgrepError(
                        f"Pattern id from semgrep-core was missing in pattern spans. {PLEASE_FILE_ISSUE_TEXT}"
                    )
                matching_span = matching_pattern.span

                raise InvalidPatternError(
                    short_msg=error_type,
                    long_msg=
                    f"Pattern could not be parsed as a {error_json['language']} semgrep pattern",
                    spans=[matching_span],
                    help=None,
                )
        # no special formatting ought to be required for the other types; the semgrep python should be performing
        # validation for them. So if any other type of error occurs, ask the user to file an issue
        else:
            raise SemgrepError(
                f"an internal error occured while invoking semgrep-core while running rule '{rule.id}'. Consider skipping this rule and reporting this issue.\n\t{error_type}: {error_json.get('message', 'no message')}\n{PLEASE_FILE_ISSUE_TEXT}"
            )
Exemplo n.º 26
0
    def post_output(cls, output_url: str, output: str) -> None:
        import requests  # here for faster startup times

        logger.info(f"posting to {output_url}...")
        try:
            r = requests.post(output_url, data=output, timeout=10)
            logger.debug(f"posted to {output_url} and got status_code:{r.status_code}")
        except requests.exceptions.Timeout:
            raise SemgrepError(f"posting output to {output_url} timed out")
Exemplo n.º 27
0
 def _parse_core_output(self, core_run_out: bytes) -> Dict[str, Any]:
     # see if semgrep output a JSON error that we can decode
     semgrep_output = core_run_out.decode("utf-8", "replace")
     try:
         return cast(Dict[str, Any], json.loads(semgrep_output))
     except ValueError:
         raise SemgrepError(
             f"unexpected non-json output while invoking semgrep-core:\n{semgrep_output}\n\n{PLEASE_FILE_ISSUE_TEXT}"
         )
Exemplo n.º 28
0
def synthesize(language: str, code_to_synthesize: str,
               targets_str: Sequence[str]) -> None:
    targets = semgrep.config_resolver.resolve_targets(targets_str)

    if len(targets) != 1:
        raise SemgrepError(
            "--synthesize-patterns requires exactly one target file")

    target = targets[0]
    args = ["-synthesize_patterns", code_to_synthesize, str(target)]

    cmd = [SemgrepCore.path()] + args
    try:
        output = sub_check_output(cmd)
    except subprocess.CalledProcessError as ex:
        raise SemgrepError(
            f"error invoking semgrep with:\n\t{' '.join(cmd)}\n\t{ex}\n{PLEASE_FILE_ISSUE_TEXT}"
        )
    print(output.decode(errors="replace"))
Exemplo n.º 29
0
    def _raise_semgrep_error_from_json(
        self,
        error_json: Dict[str, Any],
        rule: Rule,
    ) -> None:
        """
        See format_output_exception in semgrep O'Caml for details on schema
        """
        error_type = error_json["error"]
        if error_type == "invalid language":
            raise SemgrepError(
                f'{error_json["language"]} was accepted by semgrep but rejected by semgrep-core. {PLEASE_FILE_ISSUE_TEXT}'
            )
        elif error_type == "invalid regexp in rule":
            raise SemgrepError(
                f'Invalid regexp in rule: {error_json["message"]}')
        elif error_type == "invalid pattern":
            range = error_json["range"]
            s = error_json.get("pattern", "<no pattern>")
            matching_span = Span.from_string_token(
                s=s,
                line=range.get("line", 0),
                col=range.get("col", 0),
                path=range.get("path", []),
                filename="semgrep temp file",
            )
            if error_json["message"] == "Parsing.Parse_error":
                long_msg = f"Pattern `{s.strip()}` could not be parsed as a {error_json['language']} semgrep pattern"
            else:
                long_msg = f"Error parsing {error_json['language']} pattern: {error_json['message']}"

            raise InvalidPatternError(
                short_msg=error_type,
                long_msg=long_msg,
                spans=[matching_span],
                help=None,
            )
        # no special formatting ought to be required for the other types; the semgrep python should be performing
        # validation for them. So if any other type of error occurs, ask the user to file an issue
        else:
            raise SemgrepError(
                f"an internal error occured while invoking semgrep-core while running rule '{rule.id}'. Consider skipping this rule and reporting this issue.\n\t{error_type}: {error_json.get('message', 'no message')}\n{PLEASE_FILE_ISSUE_TEXT}"
            )
Exemplo n.º 30
0
def load_config_from_local_path(location: str) -> Dict[str, YamlTree]:
    """
    Return config file(s) as dictionary object
    """
    base_path = get_base_path()
    loc = base_path.joinpath(location)
    if loc.exists():
        if loc.is_file():
            return parse_config_at_path(loc)
        elif loc.is_dir():
            return parse_config_folder(loc)
        else:
            raise SemgrepError(
                f"config location `{loc}` is not a file or folder!")
    else:
        addendum = ""
        if IN_DOCKER:
            addendum = " (since you are running in docker, you cannot specify arbitrary paths on the host; they must be mounted into the container)"
        raise SemgrepError(
            f"unable to find a config; path `{loc}` does not exist{addendum}")