def __init__(self, runner, grammar, log_test=False, log_reduce=False, timeout=None): self.runner = runner self.parser = EarleyParser(grammar) self.grammar = self.parser.grammar() self.start_symbol = self.parser.start_symbol() self.log_test = log_test self.log_reduce = log_reduce self.try_all_combinations = False self.timeout = timeout self.reset()
def __init__(self, grammar: Grammar, generalizer_class: Type = TreeGeneralizer, parser: Optional[Parser] = None, **kwargs: Any) -> None: """Constructor. `grammar` is an input grammar in fuzzingbook format. `generalizer_class` is the tree generalizer class to use (default: `TreeGeneralizer`) `parser` is the parser to use (default: `EarleyParser(grammar)`). All other keyword args are passed to the tree generalizer, notably: `fuzzer` - the fuzzer to use (default: `GrammarFuzzer`), and `log` - enables debugging output if True. """ super().__init__() self.grammar = grammar assert is_valid_grammar(grammar) self.generalizer_class = generalizer_class if parser is None: parser = EarleyParser(grammar) self.parser = parser self.kwargs = kwargs # These save state for further fuzz() calls self.generalized_args: Dict[str, Any] = {} self.generalized_trees: Dict[str, DerivationTree] = {} self.generalizers: Dict[str, TreeGeneralizer] = {}
class MyGrammarReducer(object): def __init__(self, runner, grammar, log_test=False, log_reduce=False, timeout=None): self.runner = runner self.parser = EarleyParser(grammar) self.grammar = self.parser.grammar() self.start_symbol = self.parser.start_symbol() self.log_test = log_test self.log_reduce = log_reduce self.try_all_combinations = False self.timeout = timeout self.reset() def reset(self): self.tests = 0 self.cache = {} def execution(self, inp): if inp in self.cache: if self.log_test == True: print("cache :)") return self.cache[inp] try: input = open("./tmp/output" + str(self.tests), "r") subprocess.run([self.runner], stdin=input, stdout=self.devnull, stderr=self.devnull, timeout=self.timeout, check=True) except subprocess.TimeoutExpired: if self.log_test == True: print("timeout...") flag = True except subprocess.CalledProcessError: if self.log_test == True: print("crash!!") flag = False else: flag = True finally: self.cache[inp] = flag self.tests += 1 input.close() return flag def subtrees_with_symbol(self, tree, symbol, depth=-1, ignore_root=True): result = [] (child_symbol, children) = tree if depth <= 0 and not ignore_root and child_symbol == symbol: result.append(tree) if depth != 0 and children is not None: for c in children: result += self.subtrees_with_symbol(c, symbol, depth=depth - 1, ignore_root=False) return result def possible_combinations(self, list_of_lists): if len(list_of_lists) == 0: return [] result = [] for e in list_of_lists[0]: if len(list_of_lists) == 1: result.append([e]) else: for c in self.possible_combinations(list_of_lists[1:]): new_combination = [e] + c result.append(new_combination) return result def number_of_nodes(self, tree): (symbol, children) = tree return 1 + sum([self.number_of_nodes(c) for c in children]) def alternate_reductions(self, tree, symbol, depth=-1): reductions = [] expansions = self.grammar.get(symbol, []) expansions.sort( key=lambda expansion: len(expansion_to_children(expansion))) for expansion in expansions: expansion_children = expansion_to_children(expansion) match = True new_children_reductions = [] for (alt_symbol, _) in expansion_children: child_reductions = self.subtrees_with_symbol(tree, alt_symbol, depth=depth) if len(child_reductions) == 0: match = False break new_children_reductions.append(child_reductions) if not match: continue for new_children in self.possible_combinations( new_children_reductions): new_tree = (symbol, new_children) if self.number_of_nodes(new_tree) < self.number_of_nodes(tree): reductions.append(new_tree) if not self.try_all_combinations: break reductions.sort(key=self.number_of_nodes) return reductions def symbol_reductions(self, tree, symbol, depth=-1): reductions = (self.subtrees_with_symbol(tree, symbol, depth=depth) + self.alternate_reductions(tree, symbol, depth=depth)) unique_reductions = [] for r in reductions: if r not in unique_reductions: unique_reductions.append(r) return unique_reductions def reduce_subtree(self, tree, subtree, depth=-1): if os.path.exists("./tmp"): subprocess.call(["rm", "-r", "tmp"]) subprocess.call(["mkdir", "tmp"]) self.devnull = open(os.devnull, 'w') symbol, children = subtree if len(children) == 0: return False if self.log_reduce == True: print("Reducing", all_terminals(subtree), "with depth", depth) reduced = False while True: reduced_child = False for i, child in enumerate(children): (child_symbol, _) = child for reduction in self.symbol_reductions( child, child_symbol, depth): if self.number_of_nodes(reduction) >= self.number_of_nodes( child): continue if self.log_reduce == True: print("Replacing", all_terminals(children[i]), "by", all_terminals(reduction)) children[i] = reduction if self.log_test == True: print("Test" + str(self.tests) + " " + all_terminals(tree)) output = open("./tmp/output" + str(self.tests), "wb") output.write(all_terminals(tree).encode()) output.close() if self.execution(all_terminals(tree)) == False: if self.log_reduce == True: print("New tree:", all_terminals(tree)) reduced = reduced_child = True break else: children[i] = child if not reduced_child: if self.log_reduce == True: print("Tried all alternatives for", all_terminals(subtree)) break for c in children: if self.reduce_subtree(tree, c, depth): reduced = True return reduced def reduce_tree(self, tree): return self.reduce_subtree(tree, tree) def parse(self, inp): tree, *_ = self.parser.parse(inp) if self.log_reduce == True: print(all_terminals(tree)) return tree def reduce(self, inp): tree = self.parse(inp) self.reduce_tree(tree) return all_terminals(tree)
import pprint if __name__ == '__main__': pp = pprint.PrettyPrinter(depth=7) pp.pprint(simple_html_fuzzer.derivation_tree) ## Parsing ## ------- if __name__ == '__main__': print('\n## Parsing') from fuzzingbook.Parser import Parser, EarleyParser # minor dependency if __name__ == '__main__': simple_html_parser = EarleyParser(SIMPLE_HTML_GRAMMAR) if __name__ == '__main__': bad_input_tree = list(simple_html_parser.parse(BAD_INPUT))[0] if __name__ == '__main__': display_tree(bad_input_tree) from fuzzingbook.GrammarFuzzer import tree_to_string, all_terminals if __name__ == '__main__': tree_to_string(bad_input_tree) if __name__ == '__main__': assert tree_to_string(bad_input_tree) == BAD_INPUT