def gather_plot_data(problem, problemset, rers_basepath, afl_basepath): rers_path = f"{rers_basepath}/{problemset}/{problem}/{problem}.so" afl_dir = f'{afl_basepath}/{problemset}/{problem}' bin_path = f'{afl_basepath}/{problemset}/{problem}/{problem}' sul = RERSSOConnector(rers_path) aflutils = AFLUtils(afl_dir, bin_path, [str(x) for x in sul.get_alphabet()], sul) reached = aflutils.gather_reached_errors(return_time_date=True) # Filter reached so only the earliest of each error counts time_error_reached = {} for (error, time_cur_reached) in reached: if error in time_error_reached: if time_error_reached[error] > time_cur_reached: time_error_reached[error] = time_cur_reached else: time_error_reached[error] = time_cur_reached # Sort by time reached sorted_time_reached = sorted(time_error_reached.items(), key=lambda x: x[1]) # Accumulate which errors were found by which time acc_err_reached = {} acc_errs = set() for err, time in sorted_time_reached: acc_errs.add(err) acc_err_reached[time] = acc_errs.copy() sorted_acc_reached = sorted(acc_err_reached.items(), key=lambda x: x[0]) sorted_acc_reached_count = [(time, len(errs)) for (time, errs) in sorted_acc_reached] times, counts = list(zip(*sorted_acc_reached_count)) # Get some time info from the AFL directory start_time = aflutils.get_start_date_time() last_time = aflutils.get_last_date_time() # Calculate some time stuff for plotting #min_time = min(list(times)) min_time = start_time rel_start_time = start_time - min_time rel_times = [time - min_time for time in times] rel_last_time = last_time - min_time all_times = [rel_start_time] + rel_times + [rel_last_time] all_counts = [0] + list(counts) + [max(counts)] return all_times, all_counts
def check_reached(problem, problemset, rers_basepath, afl_basepath): rers_path = f"{rers_basepath}/{problemset}/{problem}/{problem}.so" afl_dir = f'{afl_basepath}/{problemset}/{problem}' bin_path = f'{afl_basepath}/{problemset}/{problem}/{problem}' sul = RERSSOConnector(rers_path) aflutils = AFLUtils(afl_dir, bin_path, [str(x) for x in sul.get_alphabet()], sul) errors = aflutils.gather_reached_errors() return set([re.sub('error_', '', x) for x in errors])
def __init__(self, watch_dir, problem, problemset, rers_basepath, fuzzer_basepath): # Setup event handler self.event_handler = PatternMatchingEventHandler("*", "", False, False) self.event_handler.on_created = self._on_created # Create observer self.observer = Observer(timeout=10) self.observer.schedule(self.event_handler, watch_dir, recursive=False) self.observer.event_queue.maxsize = 100000 self.watch_dir = watch_dir rers_path = f"{rers_basepath}/{problemset}/{problem}/{problem}.so" fuzzer_dir = Path(f'{fuzzer_basepath}/{problemset}/{problem}') sul = RERSSOConnector(rers_path) self.cutils = CorpusUtils( corpus_path=fuzzer_dir.joinpath('corpus'), fuzzer_path=fuzzer_dir.joinpath(f'{problem}_fuzz'), sul=sul) self.errors_seen = set() self.last_n_errors_seen = 0
def check_reached(problem, problemset, rers_basepath, afl_basepath): rers_path = f"{rers_basepath}/{problemset}/{problem}/{problem}.so" afl_dir = f'{afl_basepath}/{problemset}/{problem}' bin_path = f'{afl_basepath}/{problemset}/{problem}/{problem}' sul = RERSSOConnector(rers_path) aflutils = AFLUtils(afl_dir, bin_path, [str(x) for x in sul.get_alphabet()], sul) reached = [ int(re.sub('error_', '', x)) for x in aflutils.gather_reached_errors() ] reachable, unreachable = parse_csv( Path(rers_path).parent.joinpath( f'reachability-solution-{problem}.csv')) print("Reached:", set(reached)) print("Not reached:", set(reached).symmetric_difference(set(reachable))) print(f'{len(set(reached))}/{len(set(reachable))}')
def check_reached_libfuzzer(problem, problemset, rers_basepath, fuzzer_basepath): rers_path = f"{rers_basepath}/{problemset}/{problem}/{problem}.so" fuzzer_dir = Path(f'{fuzzer_basepath}/{problemset}/{problem}') assert fuzzer_dir.exists(), fuzzer_dir sul = RERSSOConnector(rers_path) cutils = CorpusUtils(corpus_path=fuzzer_dir.joinpath('corpus_errors'), fuzzer_path=fuzzer_dir.joinpath(f'{problem}_fuzz'), sul=sul) return cutils.get_plot_data()
def check_reached(problem, problemset, rers_basepath, fuzzer_basepath): rers_path = f"{rers_basepath}/{problemset}/{problem}/{problem}.so" fuzzer_dir = Path(f'{fuzzer_basepath}/{problemset}/{problem}') assert fuzzer_dir.exists(), fuzzer_dir sul = RERSSOConnector(rers_path) cutils = CorpusUtils( corpus_path=fuzzer_dir.joinpath('corpus'), fuzzer_path=fuzzer_dir.joinpath(f'{problem}_fuzz'), sul=sul ) testcases = cutils.gather_testcases() errors = filter(lambda x: x.startswith('error'), [tmp for testcase in testcases if (tmp:= cutils.extract_error(testcase)) is not None]) return set([int(re.sub('error_', '', x)) for x in errors])
{ 'membership_query': 0, 'equivalence_query': 0, 'test_query': 0, 'state_count': 0, 'error_count': 0, 'errors': set() }, log_path=f'{problem}_nomutation.log', write_on_change={'errors'}) # Try to learn a state machine for one of the RERS problems #problem = "Problem12" problemset = "TrainingSeqReachRers2019" sul = RERSSOConnector(f"../../rers/{problemset}/{problem}/{problem}.so") eqc = StackedChecker( # MutationEquivalenceChecker(sul, ct, # target_pop_size=100000, # cluster=False, # ), SmartWmethodEquivalenceCheckerV4(sul, horizon=12, stop_on={'invalid_input'}, stop_on_startswith={'error'}, order_type='ce count')) # Store found counterexamples def onct(ctex):
logdir = Path(f'./logs/{problem}_afl_wtraces') logdir.mkdir(parents=True, exist_ok=True) statstracker = StatsTracker( { 'membership_query': 0, 'equivalence_query': 0, 'test_query': 0, 'state_count': 0, 'error_count': 0, 'errors': set() }, log_path=logdir.joinpath(f'{problem}_{now}_afl_wtraces.log'), write_on_change={'state_count', 'error_count'}) sul = RERSSOConnector(path) afl_dir = f'/home/tom/projects/lstar/experiments/learningfuzzing/{problemset}/{problem}' bin_path = f'/home/tom/projects/lstar/experiments/learningfuzzing/{problemset}/{problem}/{problem}' eqc = StackedChecker( AFLEquivalenceCheckerV2(sul, afl_dir, bin_path, feedback='w_traces'), SmartWmethodEquivalenceCheckerV2(sul, horizon=horizon, stop_on={'invalid_input'}, stop_on_startswith={'error'})) # Set up the teacher, with the system under learning and the equivalence checker teacher = Teacher(sul, eqc) # Set up the learner who only talks to the teacher learner = TTTMealyLearner(teacher)
self.sul.reset() new_output = self.sul.process_input(trimmed_trace) assert og_output == new_output assert 'error' in og_output return trimmed_trace if __name__ == "__main__": problem = "Problem12" problemset = "TrainingSeqReachRers2019" path = f"/home/tom/projects/lstar/rers/{problemset}/{problem}/{problem}.so" sul = RERSSOConnector(path) afl_dir = f'/home/tom/projects/lstar/afl/{problemset}/{problem}' bin_path = f'/home/tom/projects/lstar/afl/{problemset}/{problem}/{problem}' aflutils = AFLUtils(afl_dir, bin_path, [str(x) for x in sul.get_alphabet()], sul) reached = [ int(re.sub('error_', '', x)) for x in aflutils.gather_reached_errors() ] reachable, unreachable = parse_csv( Path(path).parent.joinpath(f'reachability-solution-{problem}.csv')) print("Reached:", set(reached))
from suls.caches.dictcache import DictCache from suls.caches.rerstriecache import RersTrieCache from suls.caches.triecache import TrieCache from suls.rersconnectorv4 import RERSConnectorV4 from suls.rerssoconnector import RERSSOConnector import pickle import pickle problem = "Problem12" cache = 'cache' path = f'../rers/TrainingSeqReachRers2019/{problem}/{problem}' n = 100000 w = 100 trie_cached_sul = TrieCache(RERSSOConnector(f'{path}.so'), storagepath=cache) dict_cached_sul = DictCache(RERSSOConnector(f'{path}.so'), storagepath=cache) # Generate a bunch of random keys alphabet = trie_cached_sul.get_alphabet() quers = choice(alphabet, (n, w)) queries = [] for i in range(n): queries.append(tuple(quers[i])) print("done generating") queries = iter(queries) trie_mem = [] dict_mem = [] count = 0
logdir = Path(f'./logs/{problem}_wmethod') logdir.mkdir(parents=True, exist_ok=True) statstracker = StatsTracker( { 'membership_query': 0, 'equivalence_query': 0, 'test_query': 0, 'state_count': 0, 'error_count': 0, 'errors': set() }, log_path=logdir.joinpath(f'{problem}_{now}.log'), write_on_change={'state_count', 'error_count'}) sul = RERSSOConnector(so_path) basepath = Path("/home/tom/afl/thesis_benchmark_3/libFuzzer/").joinpath( problemset).joinpath(problem) corpus_path = basepath.joinpath("corpus") fuzzer_path = basepath.joinpath(f"{problem}_fuzz") # eqc = LibFuzzerEquivalenceChecker(sul, # corpus_path=corpus_path, # fuzzer_path=fuzzer_path, # eqchecktype=EQCheckType.BOTH, # enable_dtraces=False, # minimize=False) # eqc = SmartWmethodEquivalenceCheckerV4(sul, # horizon=13, # stop_on={'invalid_input'}, # stop_on_startswith={'error'},
statstracker = StatsTracker( { 'membership_query': 0, 'equivalence_query': 0, 'test_query': 0, 'state_count': 0, 'error_count': 0, 'errors': set() }, log_path=f'{problem}_normal_{horizon}.log', write_on_change={'errors'}) # Setup counterexample tracking ct = CounterexampleTracker() sul = RERSSOConnector(problem_path) # We use a specialized W-method equivalence checker which features # early stopping on invalid inputs, which speeds things up a lot eqc = SmartWmethodEquivalenceCheckerV4(sul, horizon=horizon, stop_on={'invalid_input'}, stop_on_startswith={'error'}, order_type='ce count') # eqc = WmethodHorizonEquivalenceChecker(sul, 3) # Store found counterexamples def onct(ctex): ct.add(ctex)
from suls.caches.triecache import TrieCache from suls.rersconnectorv3 import RERSConnectorV3 from suls.rersconnectorv4 import RERSConnectorV4 from suls.rersconnectorv2 import RERSConnectorV2 from numpy.random import choice import time from pygtrie import StringTrie from suls.rerssoconnector import RERSSOConnector path = '../rers/TrainingSeqReachRers2019/Problem11/Problem11' r1 = RERSConnectorV2(path, terminator='-1') r2 = RERSConnectorV3(path) r3 = RERSConnectorV4(path) r4 = RERSSOConnector(f'{path}.so') # r5 = TrieCache(r3) alphabet = r1.get_alphabet() n = 1000 w = 1000 print("generating testcases") # Generate a bunch of random keys inputs = [] for i in range(n): inputs.append(list(choice(alphabet, w))) print("done generating")
output = self.sul.process_input(c) # print(output) if 'error' in output: cs.add(output) return cs if __name__ == "__main__": import matplotlib.pyplot as plt problem = "Problem17" path = Path(f'/home/tom/afl/libfuzz/SeqReachabilityRers2020/{problem}') # path = Path(f'/home/tom/projects/lstar/libfuzzer/TrainingSeqReachRers2019/{problem}') assert path.exists() sul = RERSSOConnector( f'/home/tom/projects/lstar/rers/SeqReachabilityRers2020/{problem}/{problem}.so' ) # sul = RERSSOConnector(f'/home/tom/projects/lstar/rers/TrainingSeqReachRers2019/{problem}/{problem}.so') cutils = CorpusUtils(corpus_path=path.joinpath('corpus'), fuzzer_path=path.joinpath(f'{problem}_fuzz'), sul=sul) # minimized_dir = cutils.minimize_corpus() n_reached, times_reached = cutils.get_plot_data() plt.step([x / 3600 for x in times_reached], n_reached) plt.show() # # crashing_inputs = cutils.gather_crashes() # # reached_errs = [x.replace('error_', '') for x in cutils.extract_errors(crashing_inputs)]