def parseProj(name): res = {} bil_adt_project_parser = {'load' : saveAndParse} fileName = '{}.bap'.format(name) #If we hadn't analyzed that binary before or the binary has changed... if not os.path.isfile(fileName) or isMoreRecent(name, fileName): if config.ARCH == config.x86: toPerserve, proj = bap.run(name, args=["-dbil.adt"], parser=bil_adt_project_parser) else: toPerserve, proj = bap.run(name, args=["-dbil.adt", "--llvm-base={}".format(hex(config.BASE_ADDR))], parser=bil_adt_project_parser) else: print('Loading {} ({} was already parsed)'.format(fileName, name)) with open(fileName,'rb') as f: data = f.read() toPerserve, proj = parse(data) ks = list(proj.keys()) ks.sort() for ki in range(len(ks)-1): res[ks[ki]] = (ks[ki+1]-ks[ki],proj[ks[ki]]) #Compute size of instructions res[ks[-1]] = (0, proj[ks[-1]]) return toPerserve, res
def getCode(self, addr): ''' Fetch lifted code from addr ''' ld = self.concrete.ld #Fetch object (e.g libc) obj = ld.find_object_containing(addr) if obj is None: return None #result: '__libc_start_main+0x0 in libc.so.6 (0x18d90)' func = ld.describe_addr(addr).split(' ')[0] if '+' in func: func,offset = func.split('+') else: print('[program.py:getCode TODO - what to do with positive offset???') exit() if func in summaries: #If there is a summary for the function... return summaries[func] parser = lambda x: x adt_project_parser = {'load' : parser} offset = int(offset, 16) s = obj.symbols_by_name.get(func) if config.ARCH == config.x86: arch = 'x86_32' else: arch = 'x86_64' if s is not None and addr >= s.rebased_addr and addr < s.rebased_addr+s.size: #If that functions contains the address... size = s.size addr = s.rebased_addr code = self.concrete.read_instructions(addr, size) #Load code code = getStringRepresentation(code) liftedCode = bap.run(code, bap='bap-mc', args=['--show-bil','adt', '--arch',arch, '--addr', hex(addr),'--show-size'], parser=adt_project_parser) else: #Its probably shady stuff. No matter! #size = self.getInstructionSize(addr) #This gives wrong results sometimes code = self.concrete.read_instructions(addr, 15) code = getStringRepresentation(code) liftedCode = bap.run(code, bap='bap-mc', args=['--show-bil','adt', '--arch',arch, '--addr', hex(addr),'--show-size','--only-one'], parser=adt_project_parser) return self.parseCode(addr, liftedCode)
def run(binary, arch=ARCH_ARM, r2=None, data_path="data/frameworks.dat", verbose=False): logger = logging.getLogger(LOGGER_NAME) funcs = set() proj = bap.run(binary) frameworks, framework_funcs, funcs_framework = build_fw_data(data_path) if arch == ARCH_X86_64: raise UnsupportedBinaryException("Unsupported arch: x86_64") subs = proj.program.subs detected_frameworks = [] added_frameworks = {} for sub in subs: name = "__" + sub.name[2:] name = name.replace(" ", "_") name = name[:len(name) - 1] name += "_" if name in funcs_framework and funcs_framework[ name] not in added_frameworks: detected_frameworks.append([ funcs_framework[name], framework_funcs[funcs_framework[name]] ]) added_frameworks[funcs_framework[name]] = True return detected_frameworks
def run(binary, arch, r2 = None, verbose = False): logger = logging.getLogger(LOGGER_NAME) funcs = set() proj = bap.run(binary) if arch == ARCH_X86_64: stubs = proj.sections['__stubs'] elif arch == ARCH_ARM: stubs = proj.sections["__picsymbolstub4"] else: raise Exception("Unknown arch: " + arch) subs = proj.program.subs for sub in subs: if sub.name != "_main" and "stub helpers" not in sub.name and "stub_helpers" not in sub.name: funcs.add(sub.name) if verbose: logger.info("Analyser found the current functions in the binary file: ") logger.info(funcs) for sub in subs: # ignore stubs if verbose: logger.info("[ANALYSER] Sub (%d, %s)", sub.id, sub.name) for blk in sub.blks: if "address" not in blk.attrs: continue else: faddr = int(blk.attrs["address"][:blk.attrs["address"].find(":")], 0) break if faddr >= stubs.beg: continue for blk in sub.blks: for jmp in blk.jmps: # ignore returns if isinstance(jmp, bap.bir.Call) or isinstance(jmp, bap.bir.Goto): if isinstance(jmp.target, tuple): if isinstance(jmp.target[0], bap.bir.Direct): for target in jmp.target: funcs.discard(target.arg.name[1:]) else: if isinstance(jmp.target, bap.bir.Direct): funcs.discard(jmp.target.arg.name[1:]) if r2 is not None: r2.cmd("af") results = r2.cmd("afl") for line in results.split("\n"): fname = line[(line.rfind(" ") + 1):].replace("sym.", '') if fname in funcs: funcs.remove(fname) return funcs
def main(): parser = argparse.ArgumentParser(description=description) parser.add_argument('filename', help='target filename') parser.add_argument('-s', '--src', required=True, help='the source function') parser.add_argument('-d', '--dst', required=True, help='the sink function') args = parser.parse_args() proj = bap.run(args.filename) result = verify(proj.program, args.src, args.dst) if result: print('unsatisfied ({} are reachable via {})'.format(str(result[0]), str(result[1]))) else: print('satisfied')
def main(): baptaint = BapTaint() exit() print("Running bap on hashmenot") proj = bap.run("hashmenot") print("Getting project") # Arguments arg = proj.arg attrs = proj.attrs constr = proj.constr memap = proj.memmap program = proj.program sections = proj.sections
def main(): parser = argparse.ArgumentParser(description=description) parser.add_argument('filename', help='target filename') parser.add_argument('-s', '--src', required=True, help='the source function') parser.add_argument('-d', '--dst', required=True, help='the sink function') args = parser.parse_args() proj = bap.run(args.filename) result = verify(proj.program, args.src, args.dst) if result: print('unsatisfied ({} are reachable via {})'.format( str(result[0]), str(result[1]))) else: print('satisfied')
return syms # send the found matches to temp file def find_matches(): text = stripped.sections['.text'] code = ':'.join(x.encode('hex') for x in stripped.sections['.text'].data) # open file with open("syms", "w") as fd: for func in functions: index = code.find(func['code']) if index > -1: fd.write( "<%s, %s, %d>\n" % (func['name'], hex(text.beg + index / 8), func['length'])) def inject_syms(): with open("syms") as fd: inject.injectSyms(filename, "syms", "_" + filename) # run filename = argv[1] lib = bap.run('a.out') symbols = get_symbols() functions = scan_sections() stripped = bap.run(filename) find_matches() inject_syms()
def main(argv=None, debugging=False, extras=()): ''' Main entry point, allows quick comparison of eval-based adt parser with this eval-free adt parser. Done by parsing, then comparing objects with ==. Also converts objects to strings for char-by-char comparison if the objects don't match, or the eval version can/should not be used. ''' import os # this is one of the few test functions needing this module # setup parser struct that uses eval. Do this explicitly so tests always # compare against an eval version, even after the code is (hopefully) merged witheval_adt_parser = { 'format': 'adt', 'load': lambda s: eval(s, bap.bir.__dict__) # pylint: disable=eval-used } if argv is None: argv = sys.argv toparse = argv[1] if not debugging: debugging = len(argv) > 3 logger.debug("debugging = %s", debugging) if debugging and os.path.exists('estr.txt'): # optional optimize logger.debug('loading estr.txt') with open('estr.txt') as fobj: estr = fobj.read() else: skipeval = len(argv) > 2 if skipeval: logger.info("Calling bap.run(%r, parser=PASSTHRU)", toparse) projtxt = bap.run(toparse, *extras, parser={ 'format': 'adt', 'load': lambda s: s }) if not isinstance(projtxt, str): # on python3 projtxt is bytes not str estr = projtxt.decode('utf-8') else: estr = str(projtxt) # pylint: disable=redefined-variable-type # normalize white space in input estr = estr.replace("\n", "") # normalize strings in input else: logger.info("Calling bap.run(%r, parser=WITHEVAL)", toparse) origproj = bap.run(toparse, *extras, parser=witheval_adt_parser) # make sure to do this here not before calling bap the first time # Once this runs, if a lot of memory is used, Python can't create # child processes in all cases because os.fork() will fail under heavy # memory load logger.info("Calling bap.run(%r, parser=EVALFREE)", toparse) new_proj = bap.run(toparse, *extras, parser=EVALFREE_ADT_PARSER) if not skipeval: if origproj == new_proj: # done! return estr = str(origproj) if debugging and all(( # optionally optimize to test faster os.path.exists('/tmp/astr0.txt'), os.path.exists('/tmp/astr1.txt'), os.path.exists('/tmp/astr2.txt'))): logger.debug('loading astr0.txt') with open('/tmp/astr0.txt') as fobj: astr0 = fobj.read() logger.debug('loading astr1.txt') with open('/tmp/astr1.txt') as fobj: astr1 = fobj.read() logger.debug('loading astr2.txt') with open('/tmp/astr2.txt') as fobj: astr2 = fobj.read() else: # normal test path if 'new_proj' not in locals(): # since we may have optimized it out logger.info("Calling bap.run(%r, parser=EVALFREE)", toparse) new_proj = bap.run(toparse, parser=EVALFREE_ADT_PARSER) astr0, astr1, astr2 = get_proj_strs(new_proj) if debugging: # save for manual inspection with open('/tmp/astr0.txt', 'w') as fobj: fobj.write(astr1) with open('/tmp/astr1.txt', 'w') as fobj: fobj.write(astr1) with open('/tmp/astr2.txt', 'w') as fobj: fobj.write(astr2) with open('/tmp/estr.txt', 'w') as fobj: fobj.write(estr) _compare_proj_str(estr, (astr0, astr1, astr2))
def test(src, dst): proj = bap.run('../echo') return verify(proj.program, src, dst)
def process(command): global session global initialized """Process commands here""" cmd = command.split(" ") if len(cmd) < 2: cmd.append(None) if not command.startswith("T"): return 0 if cmd[0] == "Ti": import bap proj = bap.run(binary) symbols = proj.attrs print(proj.program) return 1 elif cmd[0] == "Th": taint_highlight() elif cmd[0] == "Tp": taint_pointer(cmd[1]) elif cmd[0] == "Tpc": taint_pointer_call(cmd[1]) elif cmd[0] == "Tpl": try: for key in taints: for e in taints[key]: if "p" in key: print( colored(e, "green") + ": " + r.cmd("CC. @ " + e).strip("\n")) except: pass elif cmd[0] == "Tp-": for e in taints[r.cmd("s").strip("\n") + "p"]: r.cmd("CC- @ " + e) r.cmd("ecH- @ " + e) taints[r.cmd("s").strip("\n") + "p"] = [] elif cmd[0] == "Tr": taint_register(cmd[1]) elif cmd[0] == "Trc": taint_register_call(cmd[1]) elif cmd[0] == "Trl": try: for key in taints: for e in taints[key]: if not "p" in key and not "malloc" in key: print( colored(e, "green") + ": " + r.cmd("CC. @ " + e).strip("\n")) except: pass elif cmd[0] == "Tr-": for e in taints[r.cmd("s").strip("\n")]: r.cmd("CC- @ " + e) r.cmd("ecH- @ " + e) taints[r.cmd("s").strip("\n")] = [] elif cmd[0] == "Tr--": for key in taints: if not key == "malloc" and not "p" in key: for e in taints[key]: r.cmd("CC- @ " + e) r.cmd("ecH- @ " + e) taints[key] = [] elif cmd[0] == "Tp--": for key in taints: if "p" in key: for e in taints[key]: r.cmd("CC- @ " + e) r.cmd("ecH- @ " + e) taints[key] = [] elif cmd[0] == "Tm": taint_malloc(cmd[1]) elif cmd[0] == "Tmc": taint_malloc_call(cmd[1]) elif cmd[0] == "Tml": for e in taints["malloc"]: print( colored(e, "green") + ": " + r.cmd("CC. @ " + e).strip("\n")) elif cmd[0] == "Tm-": for e in taints["malloc"]: r.cmd("CC- @ " + e) r.cmd("ecH- @ " + e) taints["malloc"] = [] elif cmd[0] == "T-": for key in taints: for e in taints[key]: r.cmd("CC- @ " + e) r.cmd("ecH- @ " + e) taints[key] = [] elif cmd[0] == "Tl": try: for key in taints: for e in taints[key]: print( colored(e, "green") + ": " + r.cmd("CC. @ " + e).strip("\n")) except: pass else: print(colored("Taint analysis commands using BAP", "yellow")) temp = 0 if "Tm" in cmd[0]: temp = 1 elif "Tr" in cmd[0]: temp = 2 elif "Tp" in cmd[0]: temp = 3 if temp == 0 or temp == 2: print("| Tr" + colored( "[?]", "yellow" ) + " " + colored( "Propogate taint from register and mark tainted instructions", "green")) if temp == 2: print("| Trc " + colored( "Propogate taint from register and mark tainted calls", "green")) print("| Trl " + colored("List taints due to register", "green")) print("| Tr- " + colored( "Remove taints due to register at current seek", "green")) print("| Tr-- " + colored( "Remove all taints due to register sources", "green")) if temp == 0 or temp == 3: print("| Tp" + colored( "[?]", "yellow" ) + " " + colored( "Propogate taint from pointer and mark tainted instructions", "green")) if temp == 3: print("| Tpc " + colored( "Propogate taint from pointer and mark tainted calls", "green")) print("| Tpl " + colored("List taints due to register", "green")) print("| Tp- " + colored("Remove taints due to pointer", "green")) print("| Tp-- " + colored( "Remove all taints due to pointer sources", "green")) if temp == 0 or temp == 1: print("| Tm" + colored( "[?]", "yellow" ) + " " + colored( "Propogate taint from mallocs and mark tainted instructions", "green")) if temp == 1: print("| Tmc " + colored( "Propogate taint from mallocs and mark tainted calls", "green")) print("| Tml " + colored("List taints from mallocs", "green")) print("| Tm- " + colored("Remove taints due to mallocs", "green")) if temp == 0: print("| Tl " + colored("List all taint information", "green")) print("| T- " + colored("Remove all taint information", "green")) rehighlight() return 1