def explain_llil(bv, llil_instruction): """ Returns the explanation string from explanations_en.json, formatted with the preprocessed LLIL instruction """ if llil_instruction is None: return if llil_instruction.operation.name in explanations: try: # Get the string from the JSON and format it return explanations[llil_instruction.operation.name].format(llil=preprocess(bv, llil_instruction)) except AttributeError: # Usually a bad format string. Shouldn't show up unless something truly weird happens. log_error("Bad Format String in binja_explain_instruction") traceback.print_exc() return llil_instruction.operation.name # If there's anything in the LLIL that doesn't have an explanation, yell about it in the logs log_info("binja_explain_instruction doen't understand " + llil_instruction.operation.name + " yet") return llil_instruction.operation.name
def init_index(self, bv, current_function): if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 50) return search_index
def init(self): try: self.platform = Architecture['clemency'].standalone_platform self.arch = Architecture['clemency'] self.add_entry_point(0x0) self.add_auto_segment( 0x0000000, 2 * 0x3FFFFFF, 0x0, len(self.raw), SegmentFlag.SegmentExecutable | SegmentFlag.SegmentReadable) except: log_error(traceback.format_exc()) return False return True
def load_signatures(self, directories): rule_files = [] for directory in directories: if not os.path.isdir(directory) and directory != "": log_error( "YARA rule directory is invalid: {}".format(directory)) else: for f in os.listdir(directory): if f.lower().endswith((".yar", ".yara")): rule_files.append(directory + os.sep + f) for f in rule_files: try: self.rules.append(yara.compile(f)) log_info("Loaded YARA rule: {}".format(f)) except yara.SyntaxError: log_error("Syntax error compiling YARA rule: {}".format(f))
def find_hash(self, bv, current_function): """ Find functions similar to the current one. """ if not self.sim_hash_location: self.init_db() # Supported platform check if bv.platform.name not in supported_arch: bn.log_error('[!] Right now this plugin supports only the following architectures: ' + str(supported_arch)) return -1 h1, h2 = self.extract_flowgraph_hash(current_function) if os.path.isfile(self.sim_hash_location): create_index = False else: create_index = True search_index = fss.SimHashSearchIndex(self.sim_hash_location, create_index, 28) results = search_index.query_top_N(h1, h2, 5) # TODO: refactor, possibly with report template report = "" if len(results) == 0: report += "# No similar functions found" else: #TODO: add better header, but that will require some refactoring of extract function report += "# Best match results\n" for r in results: print r m = self.metadata.get(r[1], r[2]) # file name, function name if len(m) == 0: line = "- {:f} - {:x}:0x{:x}".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2]) else: line = "- {:f} - {:x}:0x{:x} {} '{}'".format(max(float(r[0]) / 128.0 - 0.5, 0.0)*2, r[1], r[2], m[0], m[1]) report += line + "\n" # Display results bn.interaction.show_markdown_report('Function Similarity Search Report', report)
def run(self): param_str: str = get_text_line_input("enter name of parameter", "parameter name").decode('utf-8') try: param = next(p for p in self.func.parameter_vars if p.name == param_str) parami: int = self.func.parameter_vars.vars.index(param) except StopIteration: log_error(f'arg {param_str} not found') return # for commercial users with ThreadPoolExecutor(16) as t: log_info(f'processing {len(self.func.callers)} callers') # so i would filter for f.auto # but the flag is extremely unreliable for c in self.func.callers: t.submit(rename_caller, self.func, c, parami) log_info('renaming done')
def define_classes_plugin(view): log_debug("define_classes_plugin") define_types_plugin(view) view.session_data['ClassList'] = {} view.session_data['ClassNames'] = {} view.session_data['ClassROList'] = {} view.session_data['Protocols'] = {} class_t = Type.named_type_from_type('class_t', view.types.get('class_t')) if class_t is None: log_error("class_t is not defined!") return _define_classes(view, class_t) _define_protocols(view) _define_categories(view)
def perform_get_instruction_low_level_il(self, data, addr, il): (instr, width, src_operand, dst_operand, src, dst, length, src_value, dst_value) = self.decode_instruction(data, addr) if instr is None: return None if InstructionIL.get(instr) is None: log_error('[0x{:4x}]: {} not implemented'.format(addr, instr)) il.append(il.unimplemented()) else: il_instr = InstructionIL[instr](il, src_operand, dst_operand, src, dst, width, src_value, dst_value) if isinstance(il_instr, list): for i in [i for i in il_instr if i is not None]: il.append(i) elif il_instr is not None: il.append(il_instr) return length
def init(self): try: hdr = self.parent_view.read(4, 2) self.binary_length = struct.unpack("<H", hdr)[0] # Add mapping for RAM and hardware registers, not backed by file contents self.add_auto_segment( 0x8000, self.binary_length, 6, self.binary_length, SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable | SegmentFlag.SegmentExecutable) self.define_auto_symbol( Symbol(SymbolType.FunctionSymbol, 0x8000, "main")) self.add_entry_point(0x8000) return True except: traceback.print_exc() print("ERROR!!!") log_error(traceback.format_exc()) return False
def handle_dependencies(plugin): path = user_plugin_path.replace('plugins', 'repositories/default/plugins') plugin_json = '{}/{}/plugin.json'.format(path, plugin.path) try: with open(plugin_json, 'r') as jsonfile: raw_data = json.load(jsonfile) dependencies = raw_data["plugin"]["dependencies"] if "pip" in dependencies: for package in dependencies["pip"]: print("Installing {} dependency: {}".format( plugin.name, package)) try: pip.main(['install', '-q', package]) except IOError: print("Unable to install {}. Permissions?".format( package)) traceback.print_exc() except IOError: log_error("Unable to install dependencies for {}. Permissions?".format( plugin.name)) traceback.print_exc()
def run(self): log_info("Scanning binary view for matching YARA signatures") # TODO: Scan the raw binary data from the Raw view instead of by segment. # This would require mapping the addresses from the Raw view to the PE/ELF views. # raw = self.bv.get_view_of_type("Raw") # reader = BinaryReader(raw) # data = reader.read(raw.end) try: for idx, rule in enumerate(self.rules): if len(self.bv.segments) == 0: # Scan binary without segments self.scan(self.bv.start, self.bv.end, rule) else: # Scan by segment for segment in self.bv.segments: if self.cancelled: return self.scan(segment.start, segment.data_length, rule) self.progress = f"{self.progress_banner} matching on rules ({round((idx / len(self.rules)) * 100)}%)" except yara.TimeoutError: log_warn( "YARA scan exceeded timeout limit. Consider changing the timeout in settings." ) except yara.Error as err: log_error("Error matching on YARA rules: {}".format(str(err))) show_message_box("Error", "Check logs for details", icon=MessageBoxIcon.ErrorIcon) if 0 < len(self.results): if Settings().get_bool("yara.displayReport"): self.display_report() else: log_info("YARA scan finished with no matches.")
def update_registers(registers, derefs): """ Updates the value and dereference string for each register in the OrderedDict passed in the registers parameter. """ global main_window if main_window is not None: dereferences = OrderedDict() if (len(registers.keys()) == 0): log_alert( "Got a response from Voltron, but no registers. The process has probably exited." ) return # Update registers in order, build an OrderedDict of derefs so the order # for those is preserved too. for reg in reglist: try: main_window.regwindow.update_single_register( reg, registers[reg]) dereferences[reg] = derefs[reg] except KeyError: log_error("Voltron did not return a register called " + reg) main_window.regwindow.update_derefs(dereferences) main_window.regwindow.highlight_dirty()
def decode_instruction(self, data: bytes, addr: int): """ Iterates through all the decoders that we have defined and attempts to decode the current data. If nothing returns, we have not implemented the instruction. If 2 or more return, then we have done something wrong, resulting in ambiguous behavior. If only one returns, we are good to go! """ decode_results = [] for a in self.instructions: decode_result = a.decode(data, addr) if decode_result is None: continue decode_results.append(decode_result) if len(decode_results) > 1: log_error(f"Ambiguous decoding: {decode_result}") return None elif len(decode_results) == 0: log_error( f"No implementation found for instruction at {hex(addr)}") return None return decode_results[0]
def arch_explain_instruction(bv, instruction, lifted_il_instrs): """ Returns the explanation string from explanations_en.json, formatted with the preprocessed instruction token list """ if instruction is None: return False, [] parsed = parse_instruction(bv, instruction, lifted_il_instrs) if len(parsed) == 0: return False, [] out = [] out_bool = False for name in parsed: name = find_proper_name(name).lower() if name in explanations: try: # Get the string from the JSON and format it out_bool = out_bool or name not in dont_supersede_llil out.append(explanations[name].format( instr=preprocess(bv, parsed, lifted_il_instrs, name))) except (AttributeError, KeyError): # Usually a bad format string. Shouldn't show up unless something truly weird happens. log_error("Bad Format String in binja_explain_instruction") traceback.print_exc() out.append(name) return out_bool, out
def lookup_hash(sig, use_cache=True): if use_cache: init_cache() global _4byte_cache # if sig in _4byte_cache and _4byte_cache[sig]: # return _4byte_cache[sig] tsig = _4byte_cache.get(sig, []) if tsig: return tsig if not _requests_available: log_error("couldn't import requests for fetching from 4byte.directory") return [] try: res = requests.get(LOOKUP_4BYTE_URL, params={"hex_signature": sig}) rj = res.json() results = rj['results'] if len(results) >= 1: sig_collisions = [r['text_signature'] for r in results] _4byte_cache[sig] = sig_collisions return sig_collisions else: log_warn( "4byte.directory didn't yield any results for '{}'".format( sig)) return [] except AssertionError: raise except Exception as e: log_error("4byte lookup failed, reason ({}): {}".format(type(e), e)) return [] return []
def run_fs_tests(_=None): bins_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "data") all_tests_succeeded = True for fname in os.listdir(bins_dir): if not fname.endswith(".bin"): continue fpath = os.path.join(bins_dir, fname) log_info("\n" + "=" * 40) log_info("Running test on binary %s" % (fpath)) success = run_fs_test(fpath) if success: log_info(f"Test passed!") else: all_tests_succeeded = False log_error(f"Test failed!") if all_tests_succeeded: log_info(f"All tests passed!") else: log_error(f"Some tests failed!")
def init(self): try: self.platform = Platform['microcorruption-msp430'] self.arch = Architecture['msp430'] self.entry_addr = struct.unpack('<H', self.raw.read(0xfffe, 2))[0] self.add_auto_segment( 0, self.entry_addr, 0, self.entry_addr, SegmentFlag.SegmentReadable | SegmentFlag.SegmentWritable ) self.add_auto_segment( self.entry_addr, 0x10000 - self.entry_addr, self.entry_addr, 0x10000 - self.entry_addr, SegmentFlag.SegmentExecutable | SegmentFlag.SegmentReadable ) self.add_entry_point(self.entry_addr) path = os.path.dirname(self.file.original_filename) filename = os.path.basename(self.file.original_filename) sym_path = os.path.join(path, os.path.splitext(filename)[0] + '.sym') if os.path.exists(sym_path) and not self.symbols: with open(sym_path, 'r') as f: for line in f: addr, symbol = line.split(' ')[:2] addr = int(addr, 16) symbol = symbol.strip() self.define_auto_symbol(Symbol(SymbolType.FunctionSymbol, addr, symbol)) except: log_error(traceback.format_exc()) return False return True
def explain_instruction(bv, addr): """ Callback for the menu item that passes the information to the GUI """ init_plugin(bv) # Get the relevant information for this address func = get_function_at(bv, addr) instruction = inst_in_func(func, addr) lifted_il_list = find_lifted_il(func, addr) llil_list = find_llil(func, addr) mlil_list = find_mlil(func, addr) # Typically, we use the Low Level IL for parsing instructions. However, sometimes there isn't a corresponding # LLIL instruction (like for cmp), so in cases like that, we use the lifted IL, which is closer to the raw assembly parse_il = fold_multi_il(bv, llil_list if len(llil_list) > 0 else lifted_il_list) # Give the architecture submodule a chance to supply an explanation for this instruction that takes precedence # over the one generated via the LLIL should_supersede_llil, explanation_list = architecture_specific_explanation_function( bv, instruction, lifted_il_list ) # Display the raw instruction try: explain_window().instruction = f"{hex(addr).replace('L', '')}: {instruction}" except Exception: traceback.print_exc() if len(explanation_list) > 0: if should_supersede_llil: # If we got an architecture-specific explanation and it should supersede the LLIL, use that explain_window().description = [explanation for explanation in explanation_list] else: # Otherwise, just prepend the arch-specific explanation to the LLIL explanation explain_window().description = [explanation for explanation in explanation_list] + [ explain_llil(bv, llil) for llil in parse_il ] else: # By default, we just use the LLIL explanation # We append the line number if we're displaying a conditional. explain_window().description = [explain_llil(bv, llil) for llil in parse_il] # Display the MLIL and LLIL, dereferencing anything that looks like a hex number into a symbol if possible explain_window().llil = [dereference_symbols(bv, llil) for llil in llil_list] explain_window().mlil = [dereference_symbols(bv, mlil) for mlil in mlil_list] # Pass in the flags, straight from the API. We don't do much with these, but they might make things more clear explain_window().flags = [ ( func.get_flags_read_by_lifted_il_instruction(lifted.instr_index), func.get_flags_written_by_lifted_il_instruction(lifted.instr_index), lifted, ) for lifted in lifted_il_list ] # Display what information we can calculate about the program state before the instruction is executed try: explain_window().state = get_state(bv, addr) except AttributeError: log_error("No instruction state support for this architecture") explain_window().show()
def find_format_strings(self, CHECK_WITH_READELF=False): visited = set() to_visit = [] # ==================== # Step 0: Get all hardcoded known printf_like functions to_visit = PrintfLikeFunction.load_all() # @@TODO: We could look for refs of strings with '%s', '%d'... and if they are the parameter of an external function, add those as 'printf like' while to_visit: printf_like_func = to_visit.pop(0) # Sometimes, due to saving printf_like_funcs in a file to later reload we get repeated entries if printf_like_func in visited: log_debug("Skipping analysis of duplicate printf_like_func ' %s '" % printf_like_func.name) continue visited.add(printf_like_func) syms = self.get_symbols_by_raw_name(printf_like_func.name) if not syms: if printf_like_func.name.startswith("sub_"): log_error(f"No symbol found for function '{printf_like_func.name}'") continue log_debug(f"\n===== {printf_like_func} =====") log_debug(f" syms: {syms} =====") # @@TODO: Add arg name 'format' and type 'char*' to the format var (Tried before but arg and var get disconnected sometimes. Likely a bug.) # Get every ref for this symbol(s) refs = [] for sym in syms: it_refs = self.bv.get_code_refs(sym.address) # readelf check to get a second opinion if CHECK_WITH_READELF and sym.type == SymbolType.ExternalSymbol: self.check_relocations_with_readelf(sym, syms, it_refs) refs += it_refs # ==================== # Step 1: Check each xref for vulns for ref in refs: log_debug(f"Analyzing xref {hex(ref.address)}") ref_result = FormatStringFinderResult(self.bv, ref) self.results.append(ref_result) # ==================== # Step 1.0: Sanity checks mlil_instr = self.get_mlil_instr(ref.function, ref.address) if not mlil_instr: continue # Check for known unhandled operations if mlil_instr.operation in ( MLILOperation.MLIL_CALL_UNTYPED, MLILOperation.MLIL_TAILCALL_UNTYPED ): log_debug("@@TODO: How to handle MLIL_CALL_UNTYPED and MLIL_TAILCALL_UNTYPED?") continue elif mlil_instr.operation in (MLILOperation.MLIL_SET_VAR, MLILOperation.MLIL_STORE): # Our xref is being used to set a var and not in a call. # @@TODO: Maybe we could try to find if it is called close by and use that as an xref continue # If it wasn't one of the above, it must be one of these if mlil_instr.operation not in (MLILOperation.MLIL_CALL, MLILOperation.MLIL_TAILCALL): assert False, f"mlil operation '{mlil_instr.operation.name}' is unsupported @ {hex(ref.address)}" # @@TODO: Can we force it to have the necessary arguments? Looking at the calling convention? if printf_like_func.parameter_index >= len(mlil_instr.params): log_error( f"{hex(ref.address)} : parameter nr {printf_like_func.parameter_index} for " f"function call of '{printf_like_func.name}' is not available" ) continue if self.should_highlight: ref.function.set_user_instr_highlight( ref.address, HighlightStandardColor.RedHighlightColor ) # ==================== # Step 1.1: Find the origins of the format parameter for this xref fmt_param = mlil_instr.ssa_form.params[printf_like_func.parameter_index] if fmt_param.operation in (MLILOperation.MLIL_CONST, MLILOperation.MLIL_CONST_PTR): # Handle immediate constants var_origins = [VarOriginConst(fmt_param.constant)] elif fmt_param.operation in (MLILOperation.MLIL_VAR_SSA, MLILOperation.MLIL_VAR_ALIASED): # @@TODO: What is the meaning of 'MLILOperation.MLIL_VAR_ALIASED' ? # Find the origins of the variable fmt_ssa = fmt_param.src mlil_ssa = ref.function.medium_level_il.ssa_form # Get the var origins. Can be a parameter, a const, an address of another var... var_origins = MLILSSAVarAnalysisOrigins(self.bv, mlil_ssa).run(fmt_ssa, self.should_highlight) else: assert False, f"ERROR: fmt_param.operation is {fmt_param.operation.name} @ {hex(ref.address)}" if var_origins is None: log_warn(f"{hex(ref.address)} : Failed to get origins of the format parameter") continue # ==================== # Step 1.2: Determine if the origins are safe or vulnerable # Case 1: If any origin is an argument -> PRINTF_LIKE # Case 2: If any is NOT a read-only constant or a parameter -> VULN # Case 3: If all are an arg or a const -> SAFE vuln_origins = [] safe_origins = [] for orig in var_origins: if isinstance(orig, VarOriginParameter): safe_origins.append(orig) # Add as a printf like function new_printf_like = PrintfLikeFunction(ref.function.name, orig.parameter_idx) # pylint: disable=no-member to_visit.append(new_printf_like) self.new_printf_like_funcs.add(new_printf_like) # Create a symbol for the new printf like function if it does not exist if not self.bv.get_symbols_by_name(ref.function.name): ref.function.name = ref.function.name elif isinstance(orig, VarOriginConst) and self.is_addr_read_only(orig.const): safe_origins.append(orig) elif isinstance(orig, VarOriginCallResult) and orig.func_name in self.safe_functions: # pylint: disable=no-member # We accept that 'dcgettext' is safe because you need root to control the translation safe_origins.append(orig) else: vuln_origins.append(orig) ref_result.set_result(safe_origins, vuln_origins) log_debug(str(ref_result)) # ==================== # Step 2: Heuristic to find function pointer calls that might me vulnerable self.heuristic_look_for_vul_function_ptr_calls(mlil_instr, var_origins) # ==================== # Step 3: Save the exported functions to a file so other files that import them know they are printf like exported_printf_like_funcs = [] for func in self.new_printf_like_funcs: syms = self.bv.get_symbols_by_name(func.name) if not syms: continue for s in syms: if s.type == SymbolType.FunctionSymbol and s.binding == SymbolBinding.GlobalBinding: log_info(f"Saving exported function '{func.name}' to user_data") exported_printf_like_funcs.append(func) break if exported_printf_like_funcs: fname = os.path.basename(self.bv.file.filename) PrintfLikeFunction.save_to_user_data(fname, exported_printf_like_funcs)
def lookup_one_inst(bv, address): """ Given an address to a PUSH instruction, take the immediate value from the push instruction, mask it s.t. it's 4 byte, perform a lookup on 4byte.directory """ init_cache() disas = bv.get_disassembly(address).strip() try: inst = disas.split(" ")[0] if not inst.startswith("PUSH"): log_error( "Instruction '{}' at address {} is not a PUSH inst".format( inst, address)) return -1 if "#" not in disas: log_error("invalid PUSH immediate value") return -1 imm = int("0x" + disas.strip().split("#")[-1], 16) log_info("EVM: 4byte lookup of hash: {}".format(imm)) # we mask the top bytes imm = imm & 0xffffffff sig = "0x{:0=8x}".format(imm) # hash_value = "#{:0=8x}".format(imm) sigs = lookup_hash(sig) log_debug("found {} sigs: {}".format(len(sigs), sigs)) if len(sigs) == 0: return 0 method_name, comment = format_comment(sigs) # reset_symbol(bv, imm, hash_value, method_name) if not comment: comment = "4byte signature: " + method_name if comment: for func in bv.get_functions_containing(address): log_debug("in function {}".format(func)) c = func.get_comment_at(address) if c: log_debug("setting comment") c = "{}\n---\n{}".format(c, comment) func.set_comment_at(address, c) else: func.set_comment_at(address, comment) except AssertionError: raise except Exception as e: log_error( "4byte lookup failed for inst {} at address '{}' reason ({}): {}". format(disas, address, type(e), e)) save_4byte_cache() return 0
def run(self, ssa_var: SSAVariable, should_highlight, visited=None): origins = [] if visited is None: visited = set() while True: if ssa_var in visited: # Seen a case where we had (in '/bin/dash'): # - r13_1#4 = ϕ(r13_1#3, r13_1#6) # - r13_1#6 = ϕ(r13_1#4, r13_1#5) msg = f"Found phi vars (including {ssa_var}) that depend on each other in function {ssa_var.var.function.start}. I've only seen this happen a couple of times." log_error(msg) origins.append(VarOriginUnknown(msg)) return origins visited.add(ssa_var) # Step 1: If we reach an ssa_var with version 0, it will have no more definitions if ssa_var.version == 0: is_parameter, parameter_idx = self.is_ssa_var_a_parameter( ssa_var) if is_parameter: origins.append(VarOriginParameter(parameter_idx)) else: # Var is version 0 but not a function parameter. Sometimes these are stack addrs. origins.append( VarOriginUnknown( "Var is version 0 but not a function parameter")) return origins # Step 2: Get the next definition var_def_instr: MediumLevelILInstruction = self.mlil_ssa_func.get_ssa_var_definition( ssa_var) if var_def_instr is None: msg = f"{ssa_var} has no definition in function {hex(ssa_var.var.function.start)} (Not sure how this is possible)" log_error(msg) origins.append(VarOriginUnknown(msg)) return origins if should_highlight: self.func.set_user_instr_highlight( var_def_instr.address, HighlightStandardColor.OrangeHighlightColor) # log_info(str(var_def_instr.operation) + ": " + str(var_def_instr)) # Step 3: Get the next var/vars to check if var_def_instr.operation in (MLILOperation.MLIL_SET_VAR_SSA, MLILOperation.MLIL_SET_VAR_ALIASED): src = var_def_instr.src if src.operation == MLILOperation.MLIL_VAR_SSA: # Keep propagating backwards ssa_var = src.src continue if src.operation in (MLILOperation.MLIL_CONST, MLILOperation.MLIL_CONST_PTR): # Found a constant origins.append(VarOriginConst(src.constant)) elif src.operation == MLILOperation.MLIL_ADDRESS_OF: origins.append(VarOriginAddressOf(src.src)) elif src.operation == MLILOperation.MLIL_LOAD_SSA: origins.append(VarOriginLoad()) else: # We are NOT interested in things like adds/subs because we are looking for either arguments or constants msg = f"{src.operation.name} for a MLIL_SET_VAR_SSA src, so we stopped propagating the chain." origins.append(VarOriginUnknown(msg)) log_warn(msg) elif var_def_instr.operation == MLILOperation.MLIL_VAR_PHI: # Find the origins of each PHI for phi_var in var_def_instr.src: origins += self.run(phi_var, should_highlight=should_highlight, visited=visited) elif var_def_instr.operation == MLILOperation.MLIL_CALL_SSA: # Found a var defined as the result of a function call func_addr = var_def_instr.dest.value.value func = self.bv.get_function_at(func_addr) if func is None: # A function call from an address that has no function? msg = f"Couldn't get function at {hex(func_addr)} (from MLIL_CALL_SSA at {var_def_instr.address})." origins.append(VarOriginUnknown(msg)) log_error(msg) else: func_name = self.bv.get_function_at( var_def_instr.dest.value.value).name origins.append(VarOriginCallResult(func_name)) else: # What is this?? msg = f"{var_def_instr.operation.name} not supported at {hex(var_def_instr.address)}" origins.append(VarOriginUnknown(msg)) log_error(msg) return origins
minor = int(minor) buildid = int(buildid) if buildid is not None else 0xffffffff import json fpath = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'plugin.json') with open(fpath) as fp: data = json.load(fp) min_version = data['minimumbinaryninjaversion'] # git builds end with ' development' if not (core_version().endswith('development') or core_version().endswith('test')): if buildid < min_version: log_error( "Debugger relies on features and fixes present in Binary Ninja >= {}. Errors may follow, please update." .format(min_version)) except: pass if sys.version_info < (3, 6, 0): # Stop executing this file, we will just throw on later imports raise Exception("Debugger requires Python 3.6 or greater") if not standalone: from . import binjaplug """ Retrieve the debugger state instance for a given BinaryView """
def log_error(self, msg): bn.log_error('[CryptoScan] {message}'.format(message=msg))
def check_arch(platform_name): if platform_name not in supported_platforms: bn.log_error('[x] Right now this plugin supports only the following platforms: ' + str(supported_platforms.keys())) return False return True
def err(x): log_error("[-] {:s}".format(x))
def dump_codecopy_data(bv, address): disas = bv.get_disassembly(address).strip() inst = disas.split(" ")[0] if not inst.startswith("CODECOPY"): log_error( "Instruction '{}' at address {} is not a CODECOPY inst".format( inst, address)) return None for function in bv.get_functions_containing(address): sp = function.get_reg_value_at(address, 'sp') # sp should be a offset if hasattr(sp, 'offset'): spoff = sp.offset else: log_error( "binary ninja couldn't track the sp offset. Can't reliably determine stack arguments." ) continue # stack pointer offsets # mem_offset = 0 code_offset_offset = 1 len_offset = 2 # retrieve actual values code_offset = function.get_stack_contents_at( address, spoff + ADDR_SZ * code_offset_offset, ADDR_SZ) length = function.get_stack_contents_at(address, spoff + ADDR_SZ * len_offset, ADDR_SZ) # check if the values if not hasattr(code_offset, 'value'): log_error("can't determine code_offset stack parameter (" + repr(code_offset) + ")") continue if not hasattr(length, 'value'): log_error("can't determine len stack parameter (" + repr(length) + ")") continue c, l = code_offset.value, length.value raw_data = bv.read(c, l) dir_name = os.path.dirname(bv.file.filename) base_name = os.path.basename(bv.file.filename) default_filename = "{}_codecopy_{}_{}.raw".format(base_name, c, l) selected_filename = get_save_filename_input( "Select Filename?", "raw", os.path.join(dir_name, default_filename)).decode("utf-8") if not selected_filename: selected_filename = default_filename full_path = os.path.join(dir_name, selected_filename) log_info("writing contents to " + repr(full_path)) with open(full_path, "wb") as f: f.write(raw_data) return full_path log_error("Couldn't find function to resolve stack slots!") return None
def load_analysis(bv): shortname = os.path.splitext(os.path.basename(bv.file.filename))[0] dirname = os.path.dirname(bv.file.filename) binaryninja.log_info(f"dirname: {dirname}\nshortname: {shortname}\n") if os.access(os.path.join(dirname, shortname + ".js"), os.R_OK): path = os.path.join(dirname, shortname + ".js") elif os.access(os.path.join(dirname, shortname + ".json"), os.R_OK): path = os.path.join(dirname, shortname + ".json") else: path = binaryninja.interaction.get_open_filename_input( "capa report:", "JSON (*.js *.json);;All Files (*)") if not path or not os.access(path, os.R_OK): binaryninja.log_error("Invalid filename.") return 0 binaryninja.log_info("Using capa file %s" % path) with open(path, "rb") as f: doc = json.loads(f.read().decode("utf-8")) if "meta" not in doc or "rules" not in doc: binaryninja.log_error("doesn't appear to be a capa report") return -1 a = doc["meta"]["sample"]["md5"].lower() md5 = binaryninja.Transform["MD5"] rawhex = binaryninja.Transform["RawHex"] b = rawhex.encode( md5.encode( bv.parent_view.read(bv.parent_view.start, bv.parent_view.end))).decode("utf-8") if not a == b: binaryninja.log_error("sample mismatch") return -2 rows = [] for rule in doc["rules"].values(): if rule["meta"].get("lib"): continue if rule["meta"].get("capa/subscope"): continue if rule["meta"]["scope"] != "function": continue name = rule["meta"]["name"] ns = rule["meta"].get("namespace", "") for va in rule["matches"].keys(): va = int(va) rows.append((ns, name, va)) # order by (namespace, name) so that like things show up together rows = sorted(rows) for ns, name, va in rows: if ns: cmt = "%s (%s)" % (name, ns) else: cmt = "%s" % (name, ) binaryninja.log_info("0x%x: %s" % (va, cmt)) try: # message will look something like: # # capa: delete service (host-interaction/service/delete) append_func_cmt(bv, va, "capa: " + cmt) except ValueError: continue binaryninja.log_info("ok")
def decode_instruction(self, data, addr): error_value = (None, None, None, None, None, None, None, None, None) if len(data) < 2: return error_value instruction = struct.unpack('<H', data[0:2])[0] # emulated instructions if instruction == 0x4130: return 'ret', None, None, None, None, None, 2, None, None opcode = (instruction & 0xf000) >> 12 mask = InstructionMask.get(opcode) shift = InstructionMaskShift.get(opcode) if mask and shift: instr = InstructionNames[opcode][(instruction & mask) >> shift] else: instr = InstructionNames[opcode] if instr is None: log_error('[{:x}] Bad opcode: {:x}'.format(addr, opcode)) return error_value if instr not in TYPE3_INSTRUCTIONS: width = 1 if (instruction & 0x40) >> 6 else 2 else: width = None src, src_operand, dst, dst_operand = GetOperands(instr, instruction) operand_length = 0 if src_operand is not None: operand_length = OperandLengths[src_operand] if dst_operand is not None: operand_length += OperandLengths[dst_operand] length = 2 + operand_length if len(data) < length: return error_value src_value, dst_value = None, None if instr in TYPE3_INSTRUCTIONS: branch_target = (instruction & 0x3ff) << 1 # check if it's a negative offset if branch_target & 0x600: branch_target |= 0xf800 branch_target -= 0x10000 src_value = addr + 2 + branch_target elif operand_length == 2: value = struct.unpack('<H', data[2:4])[0] if OperandLengths[src_operand]: src_value = value else: dst_value = value elif operand_length == 4: src_value, dst_value = struct.unpack('<HH', data[2:6]) if instr == 'mov' and dst == 'pc': instr = 'br' return instr, width, src_operand, dst_operand, src, dst, length, src_value, dst_value
def get_index_value(self, table_type): from .xtensa_tables import ( OPCODE_SPACE, QRST_TABLE, RST0_TABLE, ST0_TABLE, SNM0_TABLE, JR_TABLE, CALLX_TABLE, SYNC_TABLE, RFEI_TABLE, RFET_TABLE, ST1_TABLE, TLB_TABLE, RT0_TABLE, RST1_TABLE, ACCER_TABLE, IMP_TABLE, RFDX_TABLE, RST2_TABLE, RST3_TABLE, LSCX_TABLE, LSC4_TABLE, FP0_TABLE, FP1OP_TABLE, FP1_TABLE, LSAI_TABLE, CACHE_TABLE, DCE_TABLE, ICE_TABLE, LSCI_TABLE, MAC16_TABLE, MACID_TABLE, MACIA_TABLE, MACDD_TABLE, MACAD_TABLE, MACCD_TABLE, MACCA_TABLE, MACDA_TABLE, MACAA_TABLE, MACI_TABLE, MACC_TABLE, CALLN_TABLE, SI_TABLE, BZ_TABLE, BI0_TABLE, BI1_TABLE, B1_TABLE, B_TABLE, ST2_TABLE, ST3_TABLE, S3_TABLE) # print("TableType: ", table_type) if table_type is None: return None if table_type is OPCODE_SPACE: return self.op0 elif table_type is QRST_TABLE: return self.op1 elif table_type is RST0_TABLE: return self.op2 elif table_type is ST0_TABLE: return self.r elif table_type is SNM0_TABLE: return self.m elif table_type is JR_TABLE: return self.n elif table_type is CALLX_TABLE: return self.n elif table_type is SYNC_TABLE: return self.t elif table_type is RFEI_TABLE: return self.t elif table_type is RFET_TABLE: return self.s elif table_type is ST1_TABLE: return self.r elif table_type is TLB_TABLE: return self.r elif table_type is RT0_TABLE: return self.s elif table_type is RST1_TABLE: return self.op2 elif table_type is ACCER_TABLE: return self.op2 elif table_type is IMP_TABLE: return self.r elif table_type is RFDX_TABLE: return self.t elif table_type is RST2_TABLE: return self.op2 elif table_type is RST3_TABLE: return self.op2 elif table_type is LSCX_TABLE: return self.op2 elif table_type is LSC4_TABLE: return self.op2 elif table_type is FP0_TABLE: return self.op2 elif table_type is FP1OP_TABLE: return self.t elif table_type is FP1_TABLE: return self.op2 elif table_type is LSAI_TABLE: return self.r elif table_type is CACHE_TABLE: return self.t elif table_type is DCE_TABLE: return self.op1 elif table_type is ICE_TABLE: return self.op1 elif table_type is LSCI_TABLE: return self.r elif table_type is MAC16_TABLE: return self.op2 elif table_type is MACID_TABLE: return self.op1 elif table_type is MACIA_TABLE: return self.op1 elif table_type is MACDD_TABLE: return self.op1 elif table_type is MACAD_TABLE: return self.op1 elif table_type is MACCD_TABLE: return self.op1 elif table_type is MACCA_TABLE: return self.op1 elif table_type is MACDA_TABLE: return self.op1 elif table_type is MACAA_TABLE: return self.op1 elif table_type is MACI_TABLE: return self.op1 elif table_type is MACC_TABLE: return self.op1 elif table_type is CALLN_TABLE: return self.n elif table_type is SI_TABLE: return self.n elif table_type is BZ_TABLE: return self.m elif table_type is BI0_TABLE: return self.m elif table_type is BI1_TABLE: return self.m elif table_type is B1_TABLE: return self.r elif table_type is B_TABLE: return self.r elif table_type is ST2_TABLE: return self.t elif table_type is ST3_TABLE: return self.r elif table_type is S3_TABLE: return self.t else: print(table_type) log_error("Fell off end of get_index_type lookup")
def decode_instruction(self, data, addr): instr = None length = 2 extra = None src_value, dst_value = None, None dst_op, src_op = DEFAULT_MODE, DEFAULT_MODE src, dst = None, None if len(data) < 2: return instr, src, src_op, dst, dst_op, src_value, dst_value, length word = struct.unpack('>H', data[:2])[0] opcode_type = word >> 14 if opcode_type == 0b11: # is branch branch_type = (word & 0x3c00) >> 10 if branch_type < len(BRANCH_INSTRUCTIONS): instr = BRANCH_INSTRUCTIONS[branch_type] else: log_error('[%x] Bad branch opcode: %x' % (addr, branch_type)) return instr, src, src_op, dst, dst_op, src_value, dst_value, length branch_offset = word & 0x3ff dst_value = (branch_offset << 1) + addr src_op = EMPTY_MODE dst_op = IMM_ADDRESS_MODE elif opcode_type == 0b10: instr = SPECIAL_INSTRUCTIONS[(word >> 12) & 0x3] dst = (word & 0xf00 >> 8) dst = REGISTERS[dst] dst_op = REGISTER_MODE src_value = word & 0xff src_op = IMM_INTEGER_MODE elif opcode_type == 0b00: opcode = word >> 8 instr = INSTRUCTIONS[opcode] src, dst = self.get_operands(instr, word) if instr in IMM_INSTRUCTION_16: extra = struct.unpack('>H', data[2:4])[0] length += 2 elif instr in IMM_INSTRUCTION_32: extra = struct.unpack('>I', data[2:6])[0] length += 4 if instr in ONE_REG_INSTRUCTIONS: dst_op = REGISTER_MODE src_op = EMPTY_MODE if extra: src_value = extra src_op = IMM_INTEGER_MODE elif instr in TWO_REG_INSTRUCTIONS: src_op = REGISTER_MODE dst_op = REGISTER_MODE elif instr in NO_IMM_INSTRUCTIONS and extra: src = None dst = None src_op = EMPTY_MODE dst_op = IMM_INTEGER_MODE dst_value = extra else: src_op = EMPTY_MODE dst_op = EMPTY_MODE return instr, src, src_op, dst, dst_op, src_value, dst_value, length