def advance_next_bp_candidate(self, bdepth=-1): while True: assert (bdepth < 0 or self.callstack.depth() + 1 >= bdepth) e = self.tq.lookahead(self.shift) self.shift += 1 if e is None: return False if bdepth >= 0 and self.callstack.depth() + 1 == bdepth: # Last entry was a FUNC_RET # reached end of function context debug(3, "[%d]Reached end of context, waiting at %08x", (self.id, e.ip)) self.consume_entry(e) return False # We only merge at branch points (also call/ret) and branch targets, skip other entries if not Type.isbranch(e): continue assert (self.callstack.depth() >= bdepth) if bdepth < 0 or self.callstack.depth() == bdepth: # We are in the right context self.consume_entry(e) # Consume Call/Ret self.callstack.update_context(e) return True else: debug(3, "[%d]Ignoring %08x", (self.id, e.ip)) self.callstack.update_context(e)
def rdc_sigthres_compute(N, Alpha): """ Computes the significance threshold for the RDC. Keyword arguments: N -- Number of measurement samples Alpha -- The required confidence level (0 < Alpha < 1) Returns: L -- Significance level """ # compute sigthres level l = 10000 v = numpy.zeros(l, dtype=numpy.float) for i in range(0, l): a = numpy.random.normal(size=N) b = numpy.random.normal(size=N) R = None while R is None: debug(2, "rdc_limit computation for N=%d, alpha=%f, iteration %d/%d", (N, Alpha, i, l)) (R, _, _) = RDC.rdc(a, b, Alpha, SkipThres=True, max_iter=-1) # With max_iter=-1, R is always != None v[i] = R (mu,std) = norm.fit(v) L = norm.isf(1.0-Alpha, loc=mu, scale=std) L = numpy.min([L, 1.0]) debug(1, "New rdc_limit: Alpha=%.6f, N=%d, L=%.6f", (Alpha, N, L)) return (L)
def merge(self, newleak): assert self.ip == newleak.ip debug(1, "Merge Leaks into one.") for e in newleak.entries: self.entries.merge(newleak.entries[e]) self.status.merge(newleak.status) self.evidence += newleak.evidence
def readelfsyms(fname, image): try: command = "objdump -f %s" % (fname) output = subprocess.check_output(command.split(' ')).decode('utf-8') image.dynamic = output.find("DYNAMIC") >= 0 command = "nm -nS --defined-only %s" % (fname) output = subprocess.check_output(command.split(' ')).decode('utf-8') lines = output.splitlines() except: debug(0, "Exception reading ELF symbols: %s", (sys.exc_info())) return None if lines is None or len(lines) == 0: return None syms = [] for line in lines: values = line.split(' ') nval = len(values) idx = 1 if nval < 3 or nval > 4: continue saddr = int(values[0], 16) if nval == 4: ssize = int(values[1], 16) idx += 1 else: ssize = 0 stype = values[idx] sname = values[idx + 1] if image.dynamic: saddr += image.lower syms.append([saddr, ssize, sname, stype]) return syms
def report_dataleak(callstack, e1, e2): debug(1, "Data leak@ %08x: %08x vs %08x", (e1.ip, e1.data, e2.data)) if debuglevel(3): callstack.doprint_reverse() leak = DataLeak(e1.ip) leak.append(DataLeakEntry(e1.data)) leak.append(DataLeakEntry(e2.data)) leaks.report_leak(callstack, leak)
def lookahead(self, i): while i >= self.q.qsize(): if not self.refill(): return None e = self.q.queue[i] if debuglevel(4): debug(4, str(e)) return e
def append(self, leak): if isinstance(leak, DataLeak): self.dataleaks.merge(leak) elif isinstance(leak, CFLeak): self.cfleaks.merge(leak) else: debug(0, "Unknown type: " + str(leak.__class__)) assert False
def fast_forward(queues, bp, bdepth): # Try at most 5 times to equalize queue size # First try might not work since conditional branches are stored # as one element in the file but reported as two elements in the queue. for _ in range(1, 5): s0 = queues[0].size() s1 = queues[1].size() if s0 > s1: queues[1].refill(s0 - s1) elif s1 > s0: queues[0].refill(s1 - s0) else: break if queues[0].size() != queues[1].size(): debug(2, "queue alignment error %d vs %d", (queues[0].size(), queues[1].size())) return [queues[0].get(), queues[1].get(), bp, bdepth] while queues[0].size() > 0: e1 = queues[0].get() e2 = queues[1].get() if e1 is None: assert (e2 is None) break if e1 == e2: if Type.isbranch(e1): bp = e1 bdepth = queues[0].callstack.depth() continue else: return [e1, e2, bp, bdepth] assert (queues[1].size() == 0) # Queues are empty, start fast forward debug(2, "fast forward") while True: if not queues[0].load_chunk(): break if not queues[1].load_chunk(): break if queues[0].chunk != queues[1].chunk: break newbp = queues[0].peak_last_branch_from_chunk() # It could happen by incredibly bad luck that last chunk does not contain a branch # In this case reuse previous bp. if newbp is not None: bp = newbp consume_call_ret(queues) if queues[0].chunk is not None: queues[0].refill_chunk() if queues[1].chunk is not None: queues[1].refill_chunk() bdepth = queues[0].callstack.depth() e1 = queues[0].get() e2 = queues[1].get() return [e1, e2, bp, bdepth]
def loadpickle(pfile): debug(1, "Loading pickle file") try: with gzip.GzipFile(pfile, "rb", compresslevel=6) as f: unp = MyUnpickler(f, encoding="latin1") new = unp.load() return new except Exception as e: raise IOError("Error loading pickle file: %s" % str(e))
def report_cfleak(callstack, bp, mp, e1, len1, e2, len2): debug(1, "Control flow leak@BB %08x, merging@%08x(%s): %08x(%s)(+%d) vs %08x(%s)(+%d)", \ (bp, mp.ip, Type(mp.type).name, e1.ip, Type(e1.type), len1, e2.ip, Type(e2.type), len2)) if debuglevel(3): callstack.doprint_reverse() leak = CFLeak(bp) leak.append(CFLeakEntry(e1, len1, mp.ip)) leak.append(CFLeakEntry(e2, len2, mp.ip)) leaks.report_leak(callstack, leak)
def loadpickle(pfile): debug(1, "Loading pickle file") try: with gzip.GzipFile(pfile, 'rb') as f: unp = MyUnpickler(f, encoding='latin1') new = unp.load() return new except Exception as e: raise IOError("Error loading pickle file: %s" % str(e))
def consume_leak(self, leak): debug(2, "consuming leak@ctxt %08x", (self.ctxt.callee)) if isinstance(leak, DataLeak): self.dataleaks.merge(leak) elif isinstance(leak, CFLeak): self.cfleaks.merge(leak) else: debug(0, "Unknown type: " + str(leak.__class__)) assert False
def lookup(cls, address): assert cls.instance is not None try: (_, sym) = cls.instance.symbols.find_le(address) return sym except ValueError: return None except Exception as error: debug(0, f"lookup: {error} not catched!") return None
def merge(self, newmap): if not isinstance(newmap, self.mytype): debug(0, newmap.__class__) debug(0, "Wrong class instance: %s vs %s", (str(newmap.__class__), str(self.mytype))) assert (isinstance(newmap, self.mytype)) if not newmap in self.mymap: self.mymap[newmap] = newmap else: self.mymap[newmap].merge(newmap)
def append(self, entry): if len(self.entries) == 0: debug(0, f"Empty Leak @{hex(self.ip)}") if isinstance(entry, DataLeakEntry): location = f"addr={hex(entry.addr)}" else: location = f" bp={hex(entry.bp.ip)}" debug( 0, f"New entry for Leak @{hex(self.ip)}: {location} count={hex(entry.count)}" ) self.entries.merge(entry)
def get(self): while True: if self.q.empty(): if not self.refill(): return None e = self.q.get_nowait() self.callstack.update_context(e) if Type(e.type) in (Type.HALLOC, Type.HFREE): continue if debuglevel(4): debug(4, str(e)) return e
def export_ip(ip, datafs, imgmap, info_map): if ip is None or ip == 0: return if not ip in info_map: sym = SymbolInfo.lookup(ip) assert (sym is not None) if sym.img.dynamic: addr = ip - sym.img.lower else: addr = ip bin_file_path = sym.img.name asm_file_path = bin_file_path + ".asm" # Add binary (ELF) + ASM objdump to datafs if not bin_file_path in imgmap: datafs.add_file(bin_file_path) asm_dump = "" try: debug(1, "[ASM] objdump %s", (str(bin_file_path))) # asm_dump = subprocess.check_output(["objdump", "-Dj", ".text", bin_file_path], universal_newlines=True) with datafs.create_file(asm_file_path) as f: subprocess.call(["objdump", "-d", bin_file_path], universal_newlines=True, stdout=f) f.seek(0) asm_dump = f.read().decode('utf-8') except subprocess.CalledProcessError as err: debug(0, "[ASM] objdump %s failed with error_code: %s", (str(bin_file_path), str(err.returncode))) asm_dump = None imgmap[bin_file_path] = asm_dump if not ip in info_map: # Search for leak in asm dump asm_dump = imgmap[bin_file_path] asm_line_nr = getAsmFileInfo(addr, asm_dump) if asm_line_nr < 0: debug(1, "[ASM] unavailable for %s in %s", (hex(addr), bin_file_path)) # Search for leak in source code src_file_path, src_line_nr = getSourceFileInfo( hex(addr), bin_file_path) if src_file_path is not None and os.path.exists(src_file_path): datafs.add_file(src_file_path) else: if src_file_path is None: debug(1, "[SRC] unavailable for %s in %s", (hex(addr), bin_file_path)) else: debug(1, "[SRC] source file %s missing", (src_file_path)) ip_info = IpInfoShort(asm_file_path, asm_line_nr, src_file_path, src_line_nr) info_map[ip] = ip_info
def __init__(self, tfile, tid, showprogress=False): self.id = tid self.file = tfile self.q = Queue() self.chunk = None self.callstack = CallStack(tid) self.fsize = os.path.getsize(self.file.name) debug(2, "[%d]file size is %d", (self.id, self.fsize)) self.stepsize = self.fsize / 1000 self.fpos = 0 self.showprogress = showprogress # create a virtual call to the entry einit = self.lookahead(0) assert (einit.type == Type.FUNC_ENTRY.value)
def collapse_leaks_recursive(leaks, collapsed, callstack, collapse_cfleaks, mask, filterarr): for l in leaks.dataleaks: if len(filterarr) > 0 and not match_filter(l, filterarr): debug(1, "Filtering dleak %x", (l.ip)) continue n = l.clone_collapsed(mask) if len(n.entries) <= 1: debug(1, "Ignoring dleak %x", (n.ip)) else: collapsed.report_leak(callstack, n) for l in leaks.cfleaks: if len(filterarr) > 0 and not match_filter(l, filterarr): debug(1, "Filtering cfleak %x", (l.ip)) continue n = l.clone_collapsed(mask, collapse_cfleaks) if len(n.entries) <= 1: debug(1, "Ignoring cfleak %x", (n.ip)) else: collapsed.report_leak(callstack, n) for k in leaks.children: child = leaks.children[k] callstack.docall_context(child.ctxt) collapse_leaks_recursive(child, collapsed, callstack, collapse_cfleaks, mask, filterarr) callstack.doreturn_context() return collapsed
def getSourceFileInfo(addr, binary_path): # e.g., addr2line 0x42d4b9 -e openssl # -> file_name:line_nr # from man pages: # if the filename cannot be determined -> print two question marks # if the line nr cannot be determined -> print 0 try: output = subprocess.check_output( ["addr2line", addr, "-e", binary_path], universal_newlines=True ) infos = output.split(":") source_file_path, source_line_number = infos[0], infos[1] if "??" == source_file_path: raise subprocess.CalledProcessError except subprocess.CalledProcessError: debug(2, "[SRC] unavailable for %s in %s", (addr, binary_path)) return None, 0 except Exception as error: debug(0, f"lookup: {error} not catched!") debug(2, "[SRC] unavailable for %s in %s", (addr, binary_path)) return None, 0 if "discriminator" in source_line_number: source_line_number = source_line_number.split()[0] try: source_line_number = int(source_line_number) except ValueError: source_line_number = 0 except Exception as error: debug(0, f"lookup: {error} not catched!") source_line_number = 0 return source_file_path, source_line_number
def doreturn(self): size = len(self.stack) assert (size > 0) ctxt = self.doreturn_context() size -= 1 if size >= 1: debug(3, "[%d]Return from ctxt %08x to %08x", (self.id, ctxt.callee, self.stack[size - 1].callee)) else: debug(0, "[%d]Return from ctxt %08x to nowhere", (self.id, ctxt.callee)) if size < 0: size = 0
def find_class(self, module, name): result = None # These files have been moved into 'datastub' package mapper = [ "leaks", "IpInfoShort", "DataFS", "export", "printer", "SortedCollection", "SymbolInfo" ] if module in mapper: module = "datastub." + module try: result = super().find_class(module, name) except Exception as e: debug(0, "Error unpickling module %s, object %s" % (module, name)) debug(1, "Exception: " + str(e)) raise e return result
def merge(self, newmap): if isinstance(newmap, list): for item in newmap: self.merge(item) return if not isinstance(newmap, self.mytype): debug(0, newmap.__class__) debug( 0, "Wrong class instance: %s vs %s", (str(newmap.__class__), str(self.mytype)), ) assert isinstance(newmap, self.mytype) if newmap not in self.mymap: self.mymap[newmap] = newmap else: self.mymap[newmap].merge(newmap)
def test(Inputs, Observations, Confidence, max_iter=100): """ Checks for specific leakage between inputs and measured observations. The given input array contains the stimulations chosen by the user. The observation array contains the corresponding DBI measurements. Keyword arguments: Inputs -- 1D-array of chosen input values (Numpy Int Array!) Observations -- 1D-array of measurement samples (Numpy Int Array!) Confidence -- The required confidence level (0 ... 1) Returns: R -- Randomized dependence coefficient L -- Significance level RDC I -- Independence (True=independent, False=dependent, None=inconclusive) """ # sanity check if len(Inputs) != len(Observations): raise Exception( "Input and observation arrays must have same length!") # get sample variance ivar = numpy.var(Inputs) ovar = numpy.var(Observations) # constant input/observations if ivar == 0.0 or ovar == 0.0: debug(1, "Constant input/observations") return (None, None, None) # constant input/observations if len(Observations) < 30: debug(1, "Less than 30 observations") return (None, None, None) # varying input/observations (R, L, I) = RDC.rdc(Inputs, Observations, Confidence, SkipThres=False, max_iter=max_iter) return (R, L, I)
def advance(self, mp): debug(3, "[%d]advancing to %08x", (self.id, mp.ip)) count = 0 while True: # Make sure queue has enough items if self.q.empty(): if not self.refill(): assert (False) return -1 e = self.q.queue[0] if self.callstack.depth( ) == mp.depth and e.ip == mp.ip and e.type == mp.type: debug(3, "[%d]advanced in %d steps", (self.id, count)) return count # skip item and advance e = self.q.get_nowait() self.callstack.update_context(e) count += 1
def getSourceFileInfo(addr, binary_path): # e.g., addr2line 0x42d4b9 -e openssl # -> file_name:line_nr # from man pages: # if the filename cannot be determined -> print two question marks # if the line nr cannot be determined -> print 0 try: output = subprocess.check_output( ["addr2line", addr, "-e", binary_path], universal_newlines=True) infos = output.split(":") source_file_path, source_line_number = infos[0], infos[1] if "??" == source_file_path: raise subprocess.CalledProcessError except: debug(2, "[SRC] unavailable for %s in %s", (addr, binary_path)) return None, 0 # Sometimes, source line number is followed by additional text, e.g. "/path/to/file.c:350 (discriminator 2)" # Strip this away source_line_number = source_line_number.split()[0] return source_file_path, int(source_line_number)
def refill_chunk(self): assert (self.chunk is not None) assert (len(self.chunk) % bs == 0) cblocks = int(len(self.chunk) / bs) unpacked = struct.unpack("<" + "BQQ" * cblocks, self.chunk) for i in range(0, cblocks): e = Entry(unpacked[i * 3:(i + 1) * 3]) self.q.put_nowait(e) if debuglevel(4): debug(4, "parsing %s" % str(e)) if Type.isbranch(e): if e.data != 0: # Report conditional branches/call/ret twice: # once as original branch/call/ret at the branch point # and once as BBL at the branch target e2 = Entry([Type.FUNC_BBL.value, e.data, 0]) self.q.put_nowait(e2) if debuglevel(4): debug(4, "Is branch, creating %s" % str(e2)) self.chunk = None
def has_leak(self, callstack, leak): if callstack is None or len(callstack) == 0: if isinstance(leak, DataLeak): return leak in self.dataleaks elif isinstance(leak, CFLeak): return leak in self.cfleaks else: assert False else: # advance to correct calling context recursively # by consuming first callstack entry ctxt = callstack[0] assert isinstance(ctxt, Context) if debuglevel(3): debug(3, "Processing callstack:") for ci in callstack: debug(3, "%08x--%08x", (ci.caller, ci.callee)) if ctxt in self.children: return self.children[ctxt].has_leak(callstack[1:], leak)
def collapse_cfleaks(leaks, collapse_cfleaks, granularity, resfilter=""): mask = -1 filterarr = [] if granularity != 1: granularity -= 1 blen = granularity.bit_length() # granularity must be power of 2 assert (1 << blen == granularity + 1) mask = (-1 << blen) if len(resfilter) > 0: filterarr = resfilter.replace('"', "").replace("'", "").split(';') for f in filterarr: debug(0, "Filtering results for: " + f) if mask == -1 and collapse_cfleaks == False: # Nothing to collapse return leaks else: debug(1, "Collapsing") return collapse_leaks_recursive(leaks, CallHistory(), CallStack(), collapse_cfleaks, mask, filterarr)
def doprint_generic(self, obj, param1 = None): if isinstance(obj, LeakCounter): pass elif isinstance(obj, CallHistory): if obj.ctxt is not None: self.doprint_line(Type.FUNC_ENTRY, obj.ctxt.caller, [obj.ctxt.callee]) if len(obj.dataleaks) > 0: for leak in sorted_keys(obj.dataleaks): leak.doprint(self) if len(obj.cfleaks) > 0: for leak in sorted_keys(obj.cfleaks): leak.doprint(self) for k in sorted_keys(obj.children): obj.children[k].doprint(self, param1) if obj.ctxt is not None: self.doprint_line(Type.FUNC_EXIT) elif isinstance(obj, CFLeak): self.doprint_line(Type.CFLEAK, obj.ip, obj.get_mergepoint()) elif isinstance(obj, DataLeak): self.doprint_line(Type.DLEAK, obj.ip) else: debug(0, "Unknown instance %s", (obj.__class__)) assert(False)