def _process(objs: List[Dict[str, Any]], inpt: str, dfs: bool = False) -> Tuple[List[InputValue], bool]: """ Returns the next position to substitute and the possible substitutions based on a list of comparisons :param dfs: :param objs: The list of comparisons made during the execution. :param inpt: The input used for the execution. :return: the list of possible replacements and a flag if the last comparison was an assert """ # the sole input comparisons for those functions that need it input_comparisons = [ obj for obj in objs if obj["type"] == "INPUT_COMPARISON" ] if not objs: # FIXME return proper list of corrections return [], False # [InputValue(0, 0, random.choice(string.printable), "undef", inpt)] # get the last comparison made, then extract the character index. last_comparison = _get_last_comparison(objs) if not last_comparison: # raise ValueError("The extraction of the last made comparison returned incorrectly.") return [], False # return _construct_correction_list([random.choice(Utils.continuations)], dfs, inpt, Utils.min_index + len(inpt) - 1) # input_len = last_comparison["length"] Utils.max_index = last_comparison['index'][0] # if we can prune this branch of the search space, we do so # if Pruning.is_pruned(objs, input_comparisons, Utils.max_index): # return [] # if input_len <= Utils.max_index: # EOF -- generate more data # # FIXME return proper list of corrections # return [InputValue(Utils.max_index, Utils.min_index, random.choice(string.printable), "undef", inpt)] # First, get all token links, then get all the comparisons on the index of last character comparison cmps = _get_comparisons_on_idx(input_comparisons, Utils.max_index) ParsingStageExtractor.extract_stages(objs) TokenLearningHandler.find_learning_patterns(objs) TokenHandler.iterate_objs(input_comparisons, inpt) corrections = _get_corrections(cmps, Utils.continuations) last_obj_entry = objs[-1] return _construct_correction_list( corrections, dfs, inpt, Utils.max_index ), last_obj_entry["type"] == "INPUT_COMPARISON" and last_obj_entry[ "operator"] == "assert"
def get_corrections(self): """ Returns the new input string based on parent and other encapsulated parameters. :return: """ new_char = self.correction subst_index = self.at - self.min inp = self.inp # replace char of last comparison with new continuation inp = inp[:subst_index] + new_char # append a new char, it might be that the program expects additional input # also if the newchar is not a char but a string it is likely a keyword, so we better add a whitespace if self.operator == "strcmp": inp_rand_new = inp + " " + random.choice(Utils.continuations) # for token compares the "random" next char has to be a token itself, otw. a lexing error might occur # and we will not see a token comparison. Also between two tokens a whitespace should be allowed. elif self.operator == "tokencomp": # for token substitutions add a whitespace as this is in general used to separate tokens inp = self.inp[:subst_index] + " " + new_char if not self.inp[:subst_index].endswith(" ") else self.inp[:subst_index] + new_char inp_rand_new = inp + " " + TokenHandler.random_token() if not inp.endswith(" ") else inp + new_char elif self.operator == "strlen": inp_rand_new = inp else: inp_rand_new = inp + random.choice(Utils.continuations) if self.do_append: return inp, inp_rand_new else: return inp, inp
def print_stats_file(): """ Report stats in stats file. """ Utils.stats_file.seek(0) Utils.stats_file.write("String to Token Mapping:\n") Utils.stats_file.write(TokenHandler.print_tokenmap()) Utils.stats_file.write("\nParsing Stage Mapping:\n") Utils.stats_file.write(ParsingStageExtractor.print_stages()) Utils.stats_file.write("\n") Utils.stats_file.write( "Smallest in Heap: %s\n\tValue: %s\n\tStack: %d\n" % (str(Utils.inputs.heap[0].prio_value), Utils.inputs.heap[0].val, Utils.inputs.heap[0].val.stack_size) if Utils.inputs.heap else ( "PrioQueue is currently empty.", "")) Utils.stats_file.write("\n") Utils.stats_file.write( "Number of executions: %d\nApprox. Search Space Size: %d\nExecuted since last found: %d\nRuntime (seconds): %d\n" % (Utils.all_exec, len(Utils.inputs), Utils.current_iteration, time.time() - Utils.starttime)) Utils.stats_file.flush()
def print_afl(): """ Prints the dictionary and seed to the afl folder. Mock values given as test and dict input if nothing was created. :return: """ os.makedirs(os.path.join(g_parentdir, "afl", "dict"), exist_ok=True) os.makedirs(os.path.join(g_parentdir, "afl", "tests"), exist_ok=True) dict_counter = 0 tokens = TokenHandler.tokens() if not tokens: with open( os.path.join(g_parentdir, "afl", "dict", "entry%d" % dict_counter), "w") as afl_dict: afl_dict.write(" ") dict_counter += 1 for entry in tokens: with open( os.path.join(g_parentdir, "afl", "dict", "entry%d" % dict_counter), "w") as afl_dict: afl_dict.write(entry) dict_counter += 1 # create an empty test if no seeds are available to fulfill the requirement of afl to have at least one valid test test_counter = 0 if not Utils.seed_for_afl: with open( os.path.join(g_parentdir, "afl", "tests", "test%d" % test_counter), "w") as test_file: test_file.write(" ") test_counter += 1 for val in Utils.seed_for_afl: with open( os.path.join(g_parentdir, "afl", "tests", "test%d" % test_counter), "w") as test_file: # avoid inputs that cause the afl instrumented version of lisp to crash if not val.startswith(" ( # "): test_file.write(val) test_counter += 1
def _add_values_to_queue(h_value, inpt_counter, new_covered, values, parent_input: "InputWrapper", re_evaluation: bool = False): """ The actual addition code. :param h_value: :param inpt_counter: :param new_covered: :param values: :return: """ if h_value.cover_counter[0] >= 0: for val in values: # delete possible random continuations which are possibly part of the program input if val.correction in Utils.continuations: Utils.continuations.remove(val.correction) # TODO for the moment refill continuations if it runs empty, in future use a wider range of cont. if not Utils.continuations: Utils.continuations = [i for i in string.printable] inpt_counter += 1 # add for each substitution and element to the prio queue which will be inserted based on the # heuristic value of the substitution new_h_value: HeuristicValue = h_value.clone() new_h_value.set_input_counter_adapt_value( parent_input.prio_value.input_counter + 1 if parent_input else 0) # adjustement of the heuristic value based on the operator used and the stacksize # operator_adjustment = 10000 - (val.stack_size * 100) if val.operator == "tokencomp" and val.stack_size > 0 and h_value.cover_counter[0] > 0 else 0 # adjustement of the heuristic value based on the token used # only used if a certain amount of new basic blocks is covered token_usage_adjustement = 100 * len( val.correction ) if val.operator == "tokencomp" and h_value.cover_counter[ 0] <= individual_correction_threshold and not any( val.correction in value for value in Utils.valid_found) else 0 if val.operator != "tokencomp": new_h_value.adjust_value(len(val.correction) * 2) else: new_h_value.adjust_value(1 + token_usage_adjustement) if val.operator == "tokencomp" and val.stack_size > 0 and h_value.cover_counter[ 0] > individual_correction_threshold and not re_evaluation and val.do_append: new_h_value.set_individual_correction( TokenHandler.get_tokencompare_stack_tuple( val.at, val.correction, val.stack_size, val.id, h_value.cover_counter[0])) # if we want the value to be re-evaluated (the do_append flag is False), we have to give it a very good heuristic value elif not val.do_append: new_h_value.set_individual_correction(((-1, 0), )) new_h_value.same_path_taken = 0.0 Utils.inputs.push( InputWrapper(new_h_value, inpt_counter, new_covered, val)) # else: # for val in values: # if val.correction in Utils.continuations: # Utils.continuations.remove(val.correction) # # TODO for the moment refill continuations if it runs empty, in future use a wider range of cont. # if not Utils.continuations: # Utils.continuations = [i for i in string.printable] # inpt_counter += 1 # # add for each substitution and element to the prio queue which will be inserted based on the # # heuristic value of the substitution # stack_val = Utils.map_char_stack.get(val.correction) # if stack_val is not None: # new_h_value = h_value.clone() # new_h_value.adjust_value(-stack_val * len(val.inp)) # Utils.inputs.push(InputWrapper(new_h_value, inpt_counter, new_covered, val)) # else: # Utils.inputs.push(InputWrapper(h_value.clone(), inpt_counter, new_covered, val)) return inpt_counter
def _get_corrections(cmp_stack: List[Any], local_continuations: List[str]): """ Returns the possible substitutions for the character at index at_idx :param cmp_stack: The comparisons made on the last character. :param local_continuations: The possible characters to insert if there is no useful continuation :return: """ chars = set() tok_comp_values = set() # the index at which the same token occurred # in general the value to substitute should not be correct because we might have missed some comparisons # TODO maybe we should remember the last set of found tokcomp values, if the last set was the same, we ignore the flag found_token_same_index = None for char in cmp_stack: if char["operator"] == "switch": chars |= _construct_continuation_set(char["operand"], char) TokenLearningHandler.add_rhs(char["operand"]) elif char["operator"] == "strlen": length = int(char["operand"][0]) TokenLearningHandler.add_rhs( ["".join(["a" for _ in range(0, length)])]) chars |= _construct_continuation_set( ["".join(["a" for _ in range(0, length)])], char) elif char["operator"] == "conversion": pos_subst = ConversionHandler.get_possible_substitutions( char["operand"][0]) TokenLearningHandler.add_rhs(pos_subst) chars |= _construct_continuation_set(pos_subst, char) elif char["operator"] == "tokencomp": # check if really the largest tokencomparison is used for calculating a substitution # if TokenHandler.is_largest_token(char["index"][0]): tok_comp_values.add(int(char["operand"][0])) pos_subst = set() if TokenHandler.get_majority_token(char["index"][0]) == int( char["operand"][0]): found_token_same_index = char # only use tokencomps for which the majority vote and the lhs value are the same (so those which are likely the actual token comparisons) if TokenHandler.get_majority_token(char["index"][0]) == int( char["value"]): pos_subst = TokenHandler.get_possible_substitutions( char["operand"][0], char["stack"]) # for tokencomp we might need to correct lookaheads, thus the index of char is corrected to Utils.max_index if char["index"][0] > Utils.max_index: char["index"][0] = Utils.max_index if pos_subst: chars |= _construct_continuation_set(pos_subst, char) elif char["operator"] == "tokenstore" or char["operator"] == "assert": pass else: # for smaller and greater the exact range is not defined, so we better learn only the first value in the range if ">" in char["operator"]: TokenLearningHandler.add_rhs(char["operand"][0]) elif "<" in char["operator"]: TokenLearningHandler.add_rhs(char["operand"][-1]) else: TokenLearningHandler.add_rhs(char["operand"]) chars |= _construct_continuation_set( [random.choice(char["operand"])], char) if not chars: # for cont in local_continuations: # chars.add(("random")) return list() if found_token_same_index is not None: diff_token = TokenHandler.get_different_correct_token(tok_comp_values) if diff_token: found_token_same_index["stack"] = [ "re-eval" ] # reduce stack size to rank re-calculation higher return list( _construct_continuation_set(diff_token, found_token_same_index, False)) else: return list(chars) return list(chars)