def main(): parser = argparse.ArgumentParser(prog="competition.py", description='Inject and test LAVA bugs.') parser.add_argument('host_json', help = 'Host JSON file') parser.add_argument('project', help = 'Project name') parser.add_argument('-m', '--many', action="store", default=-1, help = 'Inject this many bugs and this many non-bugs (chosen randomly)') parser.add_argument('-n', '--minYield', action="store", default=-1, help = 'Require at least this many real bugs') parser.add_argument('-l', '--buglist', action="store", default=False, help = 'Inject this list of bugs') parser.add_argument('-e', '--exitCode', action="store", default=0, type=int, help = ('Expected exit code when program exits without crashing. Default 0')) #parser.add_argument('-i', '--diversify', action="store_true", default=False, #help = ('Diversify source code. Default false.')) parser.add_argument('-c', '--chaff', action="store_true", default=False, # TODO chaf and unvalided bugs aren't always the same thing help = ('Leave unvalidated bugs in the binary')) parser.add_argument('-t', '--bugtypes', action="store", default="rel_write", help = ('bug types to inject')) parser.add_argument('--version', action="version", version="%(prog)s {}".format(version)) args = parser.parse_args() global project project = parse_vars(args.host_json, args.project) dataflow = project.get("dataflow", False) # Default to false allowed_bugtypes = get_allowed_bugtype_num(args) # Set various paths lp = LavaPaths(project) # Make the bugs top_dir start with competition lp.bugs_top_dir = join(lp.top_dir, "competition") compdir = join(lp.top_dir, "competition") bugdir = join(compdir, "bugs") db = LavaDatabase(project) if not os.path.exists(bugdir): os.makedirs(bugdir) bugs_parent = bugdir lp.set_bugs_parent(bugdir) try: shutil.rmtree(bugdir) except: pass args.knobTrigger = False args.checkStacktrace = False failcount = 0 # generate a random seed to pass through to lavaTool so it behaves deterministcally between runs lavatoolseed = random.randint(0, 100000) ############### ## First we get a list of bugs, either from cli options, or through competition_bugs_and_non_bugs ############### if args.buglist: print ("bug_list incoming %s" % (str(args.buglist))) bug_list = competition_bugs_and_non_bugs(len(args.buglist), db, allowed_bugtypes, eval(args.buglist)) # XXX EVAL WHY elif args.many: bug_list = competition_bugs_and_non_bugs(int(args.many), db, allowed_bugtypes, None) else: print("Fatal error: no bugs specified") raise RuntimeError assert len(bug_list) # Found no bugs print('bug_list (len={}):'.format(len(bug_list))) bug_list_str = ','.join([str(bug_id) for bug_id in bug_list]) print(bug_list_str) ############### ## With our bug list in hand, we inject all these bugs and count how many we can trigger ############### real_bug_list = [] # add bugs to the source code and check that we can still compile (build, input_files, bug_solutions) = inject_bugs(bug_list, db, lp, args.host_json, \ project, args, False, dataflow=dataflow, competition=True, validated=False, lavatoolseed=lavatoolseed) assert build is not None # build is None when injection fails. Could block here to allow for manual patches # Test if the injected bugs cause approperiate crashes and that our competition infrastructure parses the crashes correctly real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \ args, False, competition=True, bug_solutions=bug_solutions) if len(real_bug_list) < int(args.minYield): print("\n\nXXX Yield too low after injection -- Require at least {} bugs for" " competition, only have {}".format(args.minYield, len(real_bug_list))) raise RuntimeError("Failure") print "\n\n Yield acceptable: {}".format(len(real_bug_list)) # TODO- the rebuild process may invalidate a previously validated bug because the trigger will change # Need to find a way to pass data between lavaTool and here so we can reinject *identical* bugs as before ############### ## After we have a list of validated bugs, we inject again. This time, we will only inject the bugs we have ## already validated, so these should all validate again. Before we reinject these, we'll remove any bugs from ## our list that use the same ATP as other bugs we're injecting. ############### if not args.chaff: # re-build just with the real bugs. Inject in competition mode. Deduplicate bugs with the same ATP location print("Reinjecting only validated bugs") real_bugs = db.session.query(Bug).filter(Bug.id.in_(real_bug_list)).all() real_bug_list = limit_atp_reuse(real_bugs) # TODO retry a few times if we fail this test if bug_list != real_bug_list: # Only reinject if our bug list has changed if len(real_bug_list) < int(args.minYield): print("\n\nXXX Yield too low after reducing duplicates -- Require at least {} bugs for \ competition, only have {}".format(args.minYield, len(real_bug_list))) raise RuntimeError("Failure") (build, input_files, bug_solutions) = inject_bugs(real_bug_list, db, lp, args.host_json, \ project, args, False, dataflow=dataflow, competition=True, validated=True, lavatoolseed=lavatoolseed) assert build is not None # build is None if injection fails ############### ## Now build our corpora directory with the buggy source dir, binaries in lava-install-public, ## lava-install-internal, and scripts to rebuild the binaries ############### corpus_dir = join(compdir, "corpora") subprocess32.check_call(["mkdir", "-p", corpus_dir]) # original bugs src dir # directory for this corpus corpname = "lava-corpus-" + ((datetime.datetime.now()).strftime("%Y-%m-%d-%H-%M-%S")) corpdir = join(corpus_dir,corpname) subprocess32.check_call(["mkdir", corpdir]) lava_bd = join(lp.bugs_parent, lp.source_root) # Copy lava's builddir into our local build-dir bd = join(corpdir, "build-dir") shutil.copytree(lava_bd, bd) # build internal version log_build_sh = join(corpdir, "log_build.sh") # We need to set the environmnet for the make command log_make = "CFLAGS=-DLAVA_LOGGING {}".format(project["make"]) internal_builddir = join(corpdir, "lava-install-internal") lava_installdir = join(bd, "lava-install") with open(log_build_sh, "w") as build: build.write("""#!/bin/bash pushd `pwd` cd {bugs_build} # Build internal version {make_clean} {configure} {log_make} rm -rf "{internal_builddir}" {install} {post_install} mv lava-install {internal_builddir} popd """.format( bugs_build=bd, make_clean = project["clean"] if "clean" in project.keys() else "", configure=project['configure'] if "configure" in project.keys() else "", log_make = log_make, internal_builddir = internal_builddir, install = project['install'].format(install_dir=lava_installdir), post_install = project['post_install'] if 'post_install' in project.keys() else "", )) run_builds([log_build_sh]) # diversify """ if args.diversify: print('Starting diversification\n') compile_commands = join(bugdir, lp.source_root, "compile_commands.json") all_c_files = get_c_files(lp.bugs_build, compile_commands) for c_file in all_c_files: print('diversifying {}'.format(c_file)) c_file = join(bugdir, lp.source_root, c_file) # pre-processing # run_cmd_notimeout( # ' '.join([ # 'gcc', '-E', '-std=gnu99', # '-I.', '-I..', # '-I/llvm-3.6.2/Release/lib/clang/3.6.2/include', # '-o', # '{}.pre'.format(c_file), # c_file])) # diversify(c_file, '{}.div'.format(c_file)) # run_cmd_notimeout(' '.join(['cp', '{}.div'.format(c_file), c_file])) # re-build (rv, outp) = run_cmd_notimeout(project['make'], cwd=lp.bugs_build) for o in outp: print(o) if rv == 0: print('build succeeded') subprocess32.check_call(project['install'], cwd=lp.bugs_build, shell=True) if 'post_install' in project: check_call(project['post_install'], cwd=lp.bugs_build, shell=True) else: print('build failed') sys.exit(-1) # re-validate old_yield = len(real_bug_list) real_bug_list = validate_bugs(bug_list, db, lp, project, input_files, build, \ args, False, competition=True, bug_solutions=bug_solutions) new_yield = len(real_bug_list) print('Old yield: {}'.format(old_yield)) print('New yield: {}'.format(new_yield)) """ # Corpus directory structure: lava-corpus-[date]/ # inputs/ # src/ # build.sh # log_build.sh # lava-install-internal # lava-install-prod # subdir with trigger inputs inputsdir = join(corpdir, "inputs") subprocess32.check_call(["mkdir", inputsdir]) # subdir with src -- note we can't create it or copytree will fail! srcdir = join(corpdir, "src") # copy src shutil.copytree(bd, srcdir) predictions = [] bug_ids = [] for bug in db.session.query(Bug).filter(Bug.id.in_(real_bug_list)).all(): prediction = basename(bug.atp.loc_filename) fuzzed_input = fuzzed_input_for_bug(project, bug) (dc, fi) = os.path.split(fuzzed_input) shutil.copy(fuzzed_input, inputsdir) predictions.append((prediction, fi, bug.type)) bug_ids.append(bug.id) print "Answer key:" with open(join(corpdir, "ans"), "w") as ans: for (prediction, fi, bugtype) in predictions: print "ANSWER [%s] [%s] [%s]" % (prediction, fi, Bug.type_strings[bugtype]) ans.write("%s %s %s\n" % (prediction, fi, Bug.type_strings[bugtype])) with open(join(corpdir, "add_bugs.sql"), "w") as f: f.write("/* This file will add all the generated lava_id values to the DB, you must update binary_id */\n") f.write("\set binary_id 0\n") for bug_id in bug_ids: f.write("insert into \"bug\" (\"lava_id\", \"binary\") VALUES (%d, :binary_id); \n" % (bug_id)) # clean up srcdir before tar os.chdir(srcdir) try: # Unconfigure subprocess32.check_call(["make", "distclean"]) except: pass # Delete private files deldirs = [join(srcdir, x) for x in [".git", "lava-instal"]] delfiles = [join(srcdir, x) for x in ["compile_commands.json", "btrace.log"]] for dirname in deldirs: if os.path.isdir(dirname): shutil.rmtree(dirname) for fname in delfiles: if os.path.exists(fname): os.remove(fname) # build source tar #tarball = join(srcdir + ".tgz") #os.chdir(corpdir) #cmd = "/bin/tar czvf " + tarball + " src" #subprocess32.check_call(cmd.split()) #print "created corpus tarball " + tarball + "\n"; #lp.bugs_install = join(corpdir,"lava-install") # Change to be in our corpdir # Save the commands we use into files so we can rerun later public_build_sh = join(corpdir, "public_build.sh") # Simple public_builddir = join(corpdir, "lava-install-public") lava_installdir = join(bd, "lava-install") with open(public_build_sh, "w") as build: build.write("""#!/bin/bash pushd `pwd` cd {bugs_build} # Build public version {make_clean} {configure} {make} rm -rf "{public_builddir}" {install} {post_install} mv lava-install {public_builddir} popd """.format( bugs_build=bd, make_clean = project["clean"] if "clean" in project.keys() else "", configure=project['configure'] if "configure" in project.keys() else "", make = project['make'], public_builddir = public_builddir, install = project['install'].format(install_dir=lava_installdir), post_install=project['post_install'] if "post_install" in project.keys() else "" )) trigger_all_crashes = join(corpdir, "trigger_crashes.sh") with open(trigger_all_crashes, "w") as build: build.write("""#!/bin/bash rm -rf validated_inputs.txt validated_bugs.txt trap "echo 'CRASH'" {{3..31}} for fname in {inputdir}; do # Get bug ID from filename (# after last -) IFS='-' read -ra fname_parts <<< "$fname" for i in ${{fname_parts[@]}}; do bugid=$i done IFS=' ' bugid=${{bugid%.*}} #Non-logging version LD_LIBRARY_PATH={librarydir2} {command2} &> /dev/null code=$? if [ "$code" -gt 130 ]; then # Competition version crashed, check log version LD_LIBRARY_PATH={librarydir} {command} &> /tmp/comp.txt logcode=$? if [ "$logcode" -lt 131 ]; then # internal version didn't crash echo "UNEXPECTED ERROR ($bugid): competition version exited $logcode while normal exited with $code -- Skipping"; else if grep -q "LAVALOG: $bugid" /tmp/comp.txt; then echo $fname >> validated_inputs.txt echo $bugid >> validated_bugs.txt else echo "Competition infrastructure failed on $bugid"; fi fi fi done""".format(command = project['command'].format(**{"install_dir": "./lava-install-internal", "input_file": "$fname"}), # This syntax is weird but only thing that works? corpdir = corpdir, librarydir = join("./lava-install-internal", "lib"), librarydir2 = join("./lava-install-public", "lib"), command2 = project['command'].format(**{"install_dir": "./lava-install-public", "input_file": "$fname"}), # This syntax is weird but only thing that works? inputdir = "./inputs/*-fuzzed-*" )) os.chmod(trigger_all_crashes, (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IROTH | stat.S_IXOTH)) # Build a version to ship in src run_builds([log_build_sh, public_build_sh]) print("Injected {} bugs".format(len(real_bug_list))) print("Counting how many crashes competition infrastructure identifies...") run_cmd(trigger_all_crashes, cwd=corpdir) # Prints about segfaults (rv, outp) = run_cmd("wc -l {}".format(join(corpdir, "validated_bugs.txt"))) if rv != 0: raise RuntimeError("Validated bugs file does not exist. Something went wrong") (a,b) = outp[0].split() n = int(a) print("\tCompetition infrastructure found: %d of %d injected bugs" % (n, len(real_bug_list)))
sys.stdout.flush() try: subprocess32.check_call(fbi_args, env=envv, stdout=sys.stdout, stderr=sys.stderr) except subprocess32.CalledProcessError as e: print("FBI Failed. Possible causes: \n"+ "\tNo DUAs found because taint analysis failed: \n" "\t\t Ensure PANDA 'saw open of file we want to taint'\n" "\t\t Make sure target has debug symbols (version2): No 'failed DWARF loading' messages\n" "\tFBI crashed (bad arguments, config, or other untested code)") raise e print() progress("Found Bugs, Injectable!!") fib_time = tock() print("fib complete %.2f seconds" % fib_time) sys.stdout.flush() db = LavaDatabase(project) print("Count\tBug Type Num\tName") for i in range(len(Bug.type_strings)): n = db.session.query(Bug).filter(Bug.type == i).count() print("%d\t%d\t%s" % (n, i, Bug.type_strings[i])) print("total dua:", db.session.query(Dua).count()) print("total atp:", db.session.query(AttackPoint).count()) print("total bug:", db.session.query(Bug).count())
def main_thread(lock, mon, done_event): v0 = 2 addstr(lock, mon, v0, 11, "LAVA: Large-scale Automated Vulnerability Addition", curses.A_BOLD) addstr(lock, mon, v0 + 1, 17, "target: %s" % target_name) v1 = 5 # stage 1 -- instrument source wait_for_file(add_queries_log) # ok the add queries log file at least exists addstr(lock, mon, v1 + 0, 15, "1. Instrument source w/") addstr(lock, mon, v1 + 1, 15, " dynamic queries & make") # get source lines of code sb.check_call(["tar", "-xf", project['tarfile'], '-C', '/tmp']) outp = sb.check_output(['sloccount', "/tmp/%s" % target_name]) for line in outp.split("\n"): foo = re.search("^ansic:\s+([0-9]+) ", line) if foo: addstr(lock, mon, v0 + 1, 42, "sloc: " + foo.groups()[0]) time.sleep(0.1) # wait for add queries to finish pattern = "add queries complete ([0-9\.]+) seconds" wait_for(pattern, add_queries_log) ti = extract_float(pattern, add_queries_log) # grab some neat stats from logfile too pattern = "num taint queries added ([0-9]+)" res = find_in_file_extract(pattern, add_queries_log) # tally up all the queries ntq = 0 for n in res: ntq += int(n) pattern = "num atp queries added ([0-9]+)" res = find_in_file_extract(pattern, add_queries_log) natp = 0 for n in res: natp += int(n) addstr(lock, mon, v1, 48, "taint queries: %d" % ntq) addstr(lock, mon, v1 + 1, 48, " atp queries: %d" % natp) time.sleep(0.1) # stage 2 -- make wait_for_file(make_log) # wait for make to finish pattern = "make complete ([0-9\.]+) seconds" wait_for(pattern, make_log) tm = extract_float(pattern, make_log) addstr(lock, mon, v1, 4, "%4.2fs" % (ti + tm)) # addstr(lock, mon, 9, 4, "%4.2fs" % tm) time.sleep(0.1) # stage 2 -- run instr program & record v2 = 8 wait_for_file(bug_mining_log) addstr(lock, mon, v2, 15, "2. Record run of") addstr(lock, mon, v2 + 1, 15, " instrumented program") pattern = "panda record complete ([0-9\.]+) seconds" wait_for(pattern, bug_mining_log) tr = extract_float(pattern, bug_mining_log) addstr(lock, mon, v2, 4, "%4.2fs" % tr) # stage 3 -- replay + taint v3 = 11 pattern = "Starting first and only replay" wait_for(pattern, bug_mining_log) addstr(lock, mon, v3, 15, "3. Replay with taint") addstr(lock, mon, v3 + 1, 15, " propagation") done = False while not done: done = check_for("taint analysis complete ([0-9\.]+) seconds", bug_mining_log) if not done: logp("still not done") pattern = "([0-9\.]+)\%\) instr" if (check_for(pattern, bug_mining_log)): perc = extract_float(pattern, bug_mining_log) addstr(lock, mon, v3 + 1, 35, " %4.2f%%" % perc) time.sleep(0.11) addstr(lock, mon, v3 + 1, 35, " 100.00%") time.sleep(0.11) addstr(lock, mon, v3 + 1, 35, " ") # interestiing stats pattern = ":\s*([0-9]+) instrs total" wait_for(pattern, bug_mining_log) ti = extract_int(pattern, bug_mining_log) addstr(lock, mon, v3, 48, "instr: %d" % ti) time.sleep(0.11) pattern = "taint analysis complete ([0-9\.]+) seconds" tt = extract_float(pattern, bug_mining_log) addstr(lock, mon, v3, 4, "%4.2fs" % tt) # figure out how big plog is assert os.path.isfile(plog) plogsize = os.stat(plog).st_size addstr(lock, mon, v3 + 1, 48, " plog: %d" % plogsize) time.sleep(0.11) # stage 4 -- fbi v4 = 16 addstr(lock, mon, v4, 15, "4. Analyze taint & find") addstr(lock, mon, v4 + 1, 15, " bug inject sites") # poll db to find out how many dua and atp we have # first_db = True last_num_dua = 0 last_num_atp = 0 last_num_bug = 0 done = False db = LavaDatabase(project) while not done: pattern = "fib complete ([0-9\.]+) seconds" done = check_for(pattern, bug_mining_log) num_dua = db.session.query(Dua).count() num_atp = db.session.query(AttackPoint).count() num_bug = db.session.query(Bug).count() # if first_db and (num_dua > 0 or num_atp > 0 or num_bug > 0): # addstr(lock, mon, v4, 48, "Database") # first_db = False if num_dua != last_num_dua: addstr(lock, mon, v4, 48, " DUAs: %d" % num_dua) if num_atp != last_num_atp: addstr(lock, mon, v4 + 1, 48, " ATPs: %d" % num_atp) if num_bug != last_num_bug: addstr(lock, mon, v4 + 2, 48, "pBUGs: %d" % num_bug) last_num_dua = num_dua last_num_atp = num_atp last_num_bug = num_bug time.sleep(0.1) tf = extract_float(pattern, bug_mining_log) addstr(lock, mon, v4, 4, "%4.2fs" % tf) # stage 5 inj v5 = 20 for trial in range(1, 2): # inject trial $trial lf = join(log_dir, "inject-{}.log".format(trial)) logp(str(trial)) wait_for_file(lf) if trial == 1: addstr(lock, mon, v5, 15, "5. Inject bugs &") addstr(lock, mon, v5 + 1, 15, " validate") vt = v5 + 2 + trial addstr(lock, mon, vt, 15, " trial %d (100 bugs):" % trial) logp("select") # select bugs pattern = "INJECTING BUGS (.*) SOURCE" wait_for(pattern, lf) addstr(lock, mon, vt, 40, "I") logp("compile") # compile pattern = "ATTEMPTING BUILD (.*) INJECTED BUG" wait_for(pattern, lf) addstr(lock, mon, vt, 41, "B") logp("orig") # validate -- does orig input still exit with 0? pattern = "buggy program succeeds (.*) original input" wait_for(pattern, lf) addstr(lock, mon, vt, 42, "O") logp("validate") # validate bugs pattern = "FUZZED INPUTS" check_for(pattern, lf) wait_for(pattern, lf) addstr(lock, mon, vt, 43, "V") logp("yield") pattern = "yield ([0-9\.]+) \(" wait_for(pattern, lf) y = extract_float(pattern, lf) addstr(lock, mon, vt, 40, "yield: %.2f" % y) pattern = "inject complete ([0-9\.]+) seconds" wait_for(pattern, lf) ti = extract_float(pattern, lf) addstr(lock, mon, vt, 4, "%.2fs" % ti) trial += 1 last_build = db.session.query(Build).order_by(-Build.id).limit(1).one() terminals = [] src_dir = join(project_dir, 'bugs', '0', target_name) install_dir = join(src_dir, 'lava-install') for bug in last_build.bugs: if db.session.query(Run)\ .filter(Run.fuzzed == bug)\ .filter(Run.build == last_build)\ .filter(Run.exitcode.in_([134, 139, -6, -11]))\ .count() > 0: unfuzzed_input = join(project_dir, 'inputs', basename(project['inputs'][0])) suff = get_suffix(unfuzzed_input) pref = unfuzzed_input[:-len(suff )] if suff != "" else unfuzzed_input fuzzed_input = "{}-fuzzed-{}{}".format(pref, bug.id, suff) cmd = project['command'].format(input_file=fuzzed_input, install_dir=install_dir) script = "echo RUNNING COMMAND for bug {}:; echo; echo FUZZED INPUT {}; echo; echo -n 'md5sum '; md5sum {}; echo; echo {}; echo; echo; LD_LIBRARY_PATH={} {}; /bin/sleep 1000"\ .format(bug.id, fuzzed_input, fuzzed_input, cmd, join(install_dir, 'lib'), cmd) terminals.append( sb.Popen([ 'gnome-terminal', '--geometry=60x24', '-x', 'bash', '-c', script ])) try: while True: pass except KeyboardInterrupt: pass done_event.set() try: sb.check_call(['killall', 'sleep']) except sb.CalledProcessError: pass