Пример #1
0
def parseProj(name):
	res = {}

	bil_adt_project_parser = {'load' : saveAndParse}

	fileName = '{}.bap'.format(name)

	#If we hadn't analyzed that binary before or the binary has changed...
	if not os.path.isfile(fileName) or isMoreRecent(name, fileName):
		if config.ARCH == config.x86:
			toPerserve, proj = bap.run(name, args=["-dbil.adt"], parser=bil_adt_project_parser)
		else:
			toPerserve, proj = bap.run(name, args=["-dbil.adt", "--llvm-base={}".format(hex(config.BASE_ADDR))], parser=bil_adt_project_parser)
	else:
		print('Loading {} ({} was already parsed)'.format(fileName, name))
		with open(fileName,'rb') as f:
			data = f.read()
			toPerserve, proj = parse(data)
	
	ks = list(proj.keys())
	ks.sort()
	
	for ki in range(len(ks)-1):
		res[ks[ki]] = (ks[ki+1]-ks[ki],proj[ks[ki]]) #Compute size of instructions
	
	res[ks[-1]] = (0, proj[ks[-1]])
	return toPerserve, res
Пример #2
0
	def getCode(self, addr):
		''' Fetch lifted code from addr '''

		ld = self.concrete.ld

		#Fetch object (e.g libc)
		obj = ld.find_object_containing(addr)

		if obj is None:
			return None

		#result: '__libc_start_main+0x0 in libc.so.6 (0x18d90)'
		func = ld.describe_addr(addr).split(' ')[0] 
		if '+' in func:
			func,offset = func.split('+')
		else:
			print('[program.py:getCode TODO - what to do with positive offset???')
			exit()


		if func in summaries: #If there is a summary for the function...
			return summaries[func]

		parser = lambda x: x
		adt_project_parser = {'load' : parser}

		offset = int(offset, 16)
		s = obj.symbols_by_name.get(func)

		if config.ARCH == config.x86:
			arch = 'x86_32'
		else:
			arch = 'x86_64'

		if s is not None and addr >= s.rebased_addr and addr < s.rebased_addr+s.size: #If that functions contains the address...
			size = s.size
			addr = s.rebased_addr

			code = self.concrete.read_instructions(addr, size) #Load code		
			code = getStringRepresentation(code)
			

			liftedCode = bap.run(code, bap='bap-mc', args=['--show-bil','adt', '--arch',arch, '--addr', hex(addr),'--show-size'], parser=adt_project_parser)
		else: #Its probably shady stuff. No matter!
			#size = self.getInstructionSize(addr) #This gives wrong results sometimes
			code = self.concrete.read_instructions(addr, 15)
			code = getStringRepresentation(code)

			liftedCode = bap.run(code, bap='bap-mc', args=['--show-bil','adt', '--arch',arch, '--addr', hex(addr),'--show-size','--only-one'], parser=adt_project_parser)
		
		return self.parseCode(addr, liftedCode)
def run(binary,
        arch=ARCH_ARM,
        r2=None,
        data_path="data/frameworks.dat",
        verbose=False):
    logger = logging.getLogger(LOGGER_NAME)
    funcs = set()
    proj = bap.run(binary)

    frameworks, framework_funcs, funcs_framework = build_fw_data(data_path)

    if arch == ARCH_X86_64:
        raise UnsupportedBinaryException("Unsupported arch: x86_64")

    subs = proj.program.subs

    detected_frameworks = []
    added_frameworks = {}

    for sub in subs:
        name = "__" + sub.name[2:]
        name = name.replace(" ", "_")
        name = name[:len(name) - 1]
        name += "_"
        if name in funcs_framework and funcs_framework[
                name] not in added_frameworks:
            detected_frameworks.append([
                funcs_framework[name], framework_funcs[funcs_framework[name]]
            ])
            added_frameworks[funcs_framework[name]] = True

    return detected_frameworks
def run(binary, arch, r2 = None, verbose = False):
	logger = logging.getLogger(LOGGER_NAME)
	funcs = set()
	proj = bap.run(binary)

	if arch == ARCH_X86_64:
		stubs = proj.sections['__stubs']
	elif arch == ARCH_ARM:
		stubs = proj.sections["__picsymbolstub4"]
	else:
		raise Exception("Unknown arch: " + arch)

	subs = proj.program.subs

	for sub in subs:
		if sub.name != "_main" and "stub helpers" not in sub.name and "stub_helpers" not in sub.name:
			funcs.add(sub.name)

	if verbose:
		logger.info("Analyser found the current functions in the binary file: ")
		logger.info(funcs)

	for sub in subs:
		# ignore stubs
		if verbose:
			logger.info("[ANALYSER] Sub (%d, %s)", sub.id, sub.name)

		for blk in sub.blks:
			if "address" not in blk.attrs:
				continue
			else:
				faddr = int(blk.attrs["address"][:blk.attrs["address"].find(":")], 0)
				break

		if faddr >= stubs.beg:
			continue

		for blk in sub.blks:
			for jmp in blk.jmps:
				# ignore returns
				if isinstance(jmp, bap.bir.Call) or isinstance(jmp, bap.bir.Goto):
					if isinstance(jmp.target, tuple):
						if isinstance(jmp.target[0], bap.bir.Direct):
							for target in jmp.target:
								funcs.discard(target.arg.name[1:])
					else:
						if isinstance(jmp.target, bap.bir.Direct):
							funcs.discard(jmp.target.arg.name[1:])

	if r2 is not None:
		r2.cmd("af")
		results = r2.cmd("afl")

		for line in results.split("\n"):
			fname = line[(line.rfind(" ") + 1):].replace("sym.", '')
			if fname in funcs:
				funcs.remove(fname)

	return funcs
def main():
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('filename', help='target filename')
    parser.add_argument('-s', '--src', required=True, help='the source function')
    parser.add_argument('-d', '--dst', required=True, help='the sink function')
    args = parser.parse_args()
    proj = bap.run(args.filename)
    result = verify(proj.program, args.src, args.dst)
    if result:
        print('unsatisfied ({} are reachable via {})'.format(str(result[0]), str(result[1])))

    else:
        print('satisfied')
Пример #6
0
def main():
    baptaint = BapTaint()
    exit()
    print("Running bap on hashmenot")
    proj = bap.run("hashmenot")
    print("Getting project")

    # Arguments
    arg = proj.arg
    attrs = proj.attrs
    constr = proj.constr
    memap = proj.memmap
    program = proj.program
    sections = proj.sections
Пример #7
0
def main():
    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('filename', help='target filename')
    parser.add_argument('-s',
                        '--src',
                        required=True,
                        help='the source function')
    parser.add_argument('-d', '--dst', required=True, help='the sink function')
    args = parser.parse_args()
    proj = bap.run(args.filename)
    result = verify(proj.program, args.src, args.dst)
    if result:
        print('unsatisfied ({} are reachable via {})'.format(
            str(result[0]), str(result[1])))
    else:
        print('satisfied')
Пример #8
0
    return syms


# send the found matches to temp file
def find_matches():
    text = stripped.sections['.text']
    code = ':'.join(x.encode('hex') for x in stripped.sections['.text'].data)
    # open file
    with open("syms", "w") as fd:
        for func in functions:
            index = code.find(func['code'])
            if index > -1:
                fd.write(
                    "<%s, %s, %d>\n" %
                    (func['name'], hex(text.beg + index / 8), func['length']))


def inject_syms():
    with open("syms") as fd:
        inject.injectSyms(filename, "syms", "_" + filename)


# run
filename = argv[1]
lib = bap.run('a.out')
symbols = get_symbols()
functions = scan_sections()
stripped = bap.run(filename)
find_matches()
inject_syms()
Пример #9
0
def main(argv=None, debugging=False, extras=()):
    '''
    Main entry point, allows quick comparison of eval-based adt parser with this
    eval-free adt parser.

    Done by parsing, then comparing objects with ==.

    Also converts objects to strings for char-by-char comparison if the objects
    don't match, or the eval version can/should not be used.
    '''
    import os  # this is one of the few test functions needing this module

    # setup parser struct that uses eval.  Do this explicitly so tests always
    # compare against an eval version, even after the code is (hopefully) merged
    witheval_adt_parser = {
        'format': 'adt',
        'load': lambda s: eval(s, bap.bir.__dict__)  # pylint: disable=eval-used
    }

    if argv is None:
        argv = sys.argv
    toparse = argv[1]
    if not debugging:
        debugging = len(argv) > 3
        logger.debug("debugging = %s", debugging)

    if debugging and os.path.exists('estr.txt'):  # optional optimize
        logger.debug('loading estr.txt')
        with open('estr.txt') as fobj:
            estr = fobj.read()
    else:
        skipeval = len(argv) > 2
        if skipeval:
            logger.info("Calling bap.run(%r, parser=PASSTHRU)", toparse)
            projtxt = bap.run(toparse,
                              *extras,
                              parser={
                                  'format': 'adt',
                                  'load': lambda s: s
                              })
            if not isinstance(projtxt,
                              str):  # on python3 projtxt is bytes not str
                estr = projtxt.decode('utf-8')
            else:
                estr = str(projtxt)  # pylint: disable=redefined-variable-type
            # normalize white space in input
            estr = estr.replace("\n", "")
            # normalize strings in input
        else:
            logger.info("Calling bap.run(%r, parser=WITHEVAL)", toparse)
            origproj = bap.run(toparse, *extras, parser=witheval_adt_parser)

        # make sure to do this here not before calling bap the first time
        # Once this runs, if a lot of memory is used, Python can't create
        # child processes in all cases because os.fork() will fail under heavy
        # memory load
        logger.info("Calling bap.run(%r, parser=EVALFREE)", toparse)
        new_proj = bap.run(toparse, *extras, parser=EVALFREE_ADT_PARSER)

        if not skipeval:
            if origproj == new_proj:  # done!
                return
            estr = str(origproj)

    if debugging and all((  # optionally optimize to test faster
            os.path.exists('/tmp/astr0.txt'), os.path.exists('/tmp/astr1.txt'),
            os.path.exists('/tmp/astr2.txt'))):
        logger.debug('loading astr0.txt')
        with open('/tmp/astr0.txt') as fobj:
            astr0 = fobj.read()
        logger.debug('loading astr1.txt')
        with open('/tmp/astr1.txt') as fobj:
            astr1 = fobj.read()
        logger.debug('loading astr2.txt')
        with open('/tmp/astr2.txt') as fobj:
            astr2 = fobj.read()
    else:  # normal test path
        if 'new_proj' not in locals():  # since we may have optimized it out
            logger.info("Calling bap.run(%r, parser=EVALFREE)", toparse)
            new_proj = bap.run(toparse, parser=EVALFREE_ADT_PARSER)

        astr0, astr1, astr2 = get_proj_strs(new_proj)

    if debugging:  # save for manual inspection
        with open('/tmp/astr0.txt', 'w') as fobj:
            fobj.write(astr1)
        with open('/tmp/astr1.txt', 'w') as fobj:
            fobj.write(astr1)
        with open('/tmp/astr2.txt', 'w') as fobj:
            fobj.write(astr2)
        with open('/tmp/estr.txt', 'w') as fobj:
            fobj.write(estr)

    _compare_proj_str(estr, (astr0, astr1, astr2))
def test(src, dst):
    proj = bap.run('../echo')
    return verify(proj.program, src, dst)
Пример #11
0
def test(src, dst):
    proj = bap.run('../echo')
    return verify(proj.program, src, dst)
Пример #12
0
    def process(command):
        global session
        global initialized
        """Process commands here"""

        cmd = command.split(" ")

        if len(cmd) < 2:
            cmd.append(None)

        if not command.startswith("T"):
            return 0

        if cmd[0] == "Ti":
            import bap
            proj = bap.run(binary)
            symbols = proj.attrs
            print(proj.program)
            return 1
        elif cmd[0] == "Th":
            taint_highlight()
        elif cmd[0] == "Tp":
            taint_pointer(cmd[1])
        elif cmd[0] == "Tpc":
            taint_pointer_call(cmd[1])
        elif cmd[0] == "Tpl":
            try:
                for key in taints:
                    for e in taints[key]:
                        if "p" in key:
                            print(
                                colored(e, "green") + ": " +
                                r.cmd("CC. @ " + e).strip("\n"))
            except:
                pass
        elif cmd[0] == "Tp-":
            for e in taints[r.cmd("s").strip("\n") + "p"]:
                r.cmd("CC- @ " + e)
                r.cmd("ecH- @ " + e)
            taints[r.cmd("s").strip("\n") + "p"] = []
        elif cmd[0] == "Tr":
            taint_register(cmd[1])
        elif cmd[0] == "Trc":
            taint_register_call(cmd[1])
        elif cmd[0] == "Trl":
            try:
                for key in taints:
                    for e in taints[key]:
                        if not "p" in key and not "malloc" in key:
                            print(
                                colored(e, "green") + ": " +
                                r.cmd("CC. @ " + e).strip("\n"))
            except:
                pass
        elif cmd[0] == "Tr-":
            for e in taints[r.cmd("s").strip("\n")]:
                r.cmd("CC- @ " + e)
                r.cmd("ecH- @ " + e)
            taints[r.cmd("s").strip("\n")] = []
        elif cmd[0] == "Tr--":
            for key in taints:
                if not key == "malloc" and not "p" in key:
                    for e in taints[key]:
                        r.cmd("CC- @ " + e)
                        r.cmd("ecH- @ " + e)
                    taints[key] = []
        elif cmd[0] == "Tp--":
            for key in taints:
                if "p" in key:
                    for e in taints[key]:
                        r.cmd("CC- @ " + e)
                        r.cmd("ecH- @ " + e)
                    taints[key] = []
        elif cmd[0] == "Tm":
            taint_malloc(cmd[1])
        elif cmd[0] == "Tmc":
            taint_malloc_call(cmd[1])
        elif cmd[0] == "Tml":
            for e in taints["malloc"]:
                print(
                    colored(e, "green") + ": " +
                    r.cmd("CC. @ " + e).strip("\n"))
        elif cmd[0] == "Tm-":
            for e in taints["malloc"]:
                r.cmd("CC- @ " + e)
                r.cmd("ecH- @ " + e)
            taints["malloc"] = []
        elif cmd[0] == "T-":
            for key in taints:
                for e in taints[key]:
                    r.cmd("CC- @ " + e)
                    r.cmd("ecH- @ " + e)
                taints[key] = []
        elif cmd[0] == "Tl":
            try:
                for key in taints:
                    for e in taints[key]:
                        print(
                            colored(e, "green") + ": " +
                            r.cmd("CC. @ " + e).strip("\n"))
            except:
                pass
        else:
            print(colored("Taint analysis commands using BAP", "yellow"))
            temp = 0
            if "Tm" in cmd[0]:
                temp = 1
            elif "Tr" in cmd[0]:
                temp = 2
            elif "Tp" in cmd[0]:
                temp = 3

            if temp == 0 or temp == 2:
                print("| Tr" + colored(
                    "[?]", "yellow"
                ) + "           " + colored(
                    "Propogate taint from register and mark tainted instructions",
                    "green"))
                if temp == 2:
                    print("| Trc             " + colored(
                        "Propogate taint from register and mark tainted calls",
                        "green"))
                    print("| Trl             " +
                          colored("List taints due to register", "green"))
                    print("| Tr-             " + colored(
                        "Remove taints due to register at current seek",
                        "green"))
                    print("| Tr--            " + colored(
                        "Remove all taints due to register sources", "green"))

            if temp == 0 or temp == 3:
                print("| Tp" + colored(
                    "[?]", "yellow"
                ) + "           " + colored(
                    "Propogate taint from pointer and mark tainted instructions",
                    "green"))
                if temp == 3:
                    print("| Tpc             " + colored(
                        "Propogate taint from pointer and mark tainted calls",
                        "green"))
                    print("| Tpl             " +
                          colored("List taints due to register", "green"))
                    print("| Tp-             " +
                          colored("Remove taints due to pointer", "green"))
                    print("| Tp--            " + colored(
                        "Remove all taints due to pointer sources", "green"))

            if temp == 0 or temp == 1:
                print("| Tm" + colored(
                    "[?]", "yellow"
                ) + "           " + colored(
                    "Propogate taint from mallocs and mark tainted instructions",
                    "green"))
                if temp == 1:
                    print("| Tmc             " + colored(
                        "Propogate taint from mallocs and mark tainted calls",
                        "green"))
                    print("| Tml             " +
                          colored("List taints from mallocs", "green"))
                    print("| Tm-             " +
                          colored("Remove taints due to mallocs", "green"))

            if temp == 0:
                print("| Tl              " +
                      colored("List all taint information", "green"))
                print("| T-              " +
                      colored("Remove all taint information", "green"))

        rehighlight()

        return 1