def create_store_and_lib(libpath=TEST_LIBRARY, parse=False, resolve_libs_recursive=False, call_resolve=False): store = LibraryStore(ldconfig_file=LDCONFIG_FILE) lib = Library(os.path.abspath(libpath), parse=parse) if resolve_libs_recursive: store.resolve_libs_recursive(lib) if call_resolve: resolve_calls(store) return store, lib
def __init__(self): self._parse_arguments() self.store = LibraryStore() self.paths = self._get_paths() for arg in [self.args.entry_list, self.args.used_functions]: if arg: entry_points = [] with open(arg, 'r') as fdesc: for line in fdesc: # .split(':') is only required for used_functions but # doesn't harm in entry_list as we need the first # element anyway (which is the full match if ':' does # not exist in the input line) cur_lib = line.strip().split(':')[0] if os.path.isfile(cur_lib): entry_points.append(cur_lib) entry_points = list(sorted(set(entry_points))) self.store.set_additional_entry_points(entry_points) self.paths.extend(entry_points) self.all_resolved_functions = None
def test_6_propagate_calls_through_objects_32_bit(self): store = LibraryStore() os.environ['ALL_FUNCTIONS_FROM_OBJECTS'] = "1" for elf in (TEST_STRUCTS32, TEST_LIBSTRUCT32): store.resolve_libs_recursive_by_path(os.path.abspath(elf)) resolve_calls(store) store.resolve_all_functions() store.propagate_call_usage() del os.environ['ALL_FUNCTIONS_FROM_OBJECTS'] library = store.get_from_path(os.path.abspath(TEST_LIBSTRUCT32)) self.assertIn(os.path.abspath(TEST_STRUCTS32), store[library.fullname].get_users_by_name('helper')) self.assertIn(os.path.abspath(TEST_STRUCTS32), store[library.fullname].get_users_by_name('from_obj'))
def test_0_resolve_libs_with_symlinks(self): store = LibraryStore(ldconfig_file=LDCONFIG_FILE) store.resolve_libs_single_by_path(os.path.abspath(TEST_LIBC_LNK)) # libc.so.6 -> libc-2.23.so, libc-2.23.so, /lib64/ld-linux-x86-64.so.2 # from libc's .interp section and its target self.assertEqual(len(store.items()), 4) self.assertEqual(store.get_from_path(os.path.abspath(TEST_LIBC_LNK)), store[os.path.abspath(TEST_LIBC)])
def test_6_propagate_calls_loaderlike(self): store = LibraryStore() bin1 = Library(os.path.abspath(TEST_LL)) bin1a = Library(os.path.abspath(TEST_LL1A)) bin2 = Library(os.path.abspath(TEST_LL2)) # Save possibly set LD_LIBRARY_PATH backup = None if 'LD_LIBRARY_PATH' in os.environ: backup = os.environ['LD_LIBRARY_PATH'] # Set LD_LIBRARY_PATH and resolve libraries os.environ['LD_LIBRARY_PATH'] = '$ORIGIN/' for binary in (bin1, bin1a, bin2): store.resolve_libs_recursive(binary) resolve_calls(store) store.resolve_all_functions_from_binaries() # Possibly restore LD_LIBRARY_PATH if backup: os.environ['LD_LIBRARY_PATH'] = backup else: del os.environ['LD_LIBRARY_PATH'] lib1 = bin1.needed_libs['lib1.so'] lib2 = store[lib1].needed_libs['lib2.so'] # Check if the weak definition of 'wfunc' in 'lib2.so' is not used by # 'lib1.so' but overridden by the strong definition in 'bin' self.assertIn(lib1, store[bin1.fullname].get_users_by_name('wfunc')) self.assertNotIn(bin1.fullname, store[lib2].get_users_by_name('wfunc')) self.assertNotIn(lib1, store[lib2].get_users_by_name('wfunc')) # However, 'bin1a' and 'lib1a.so' should appear for 'wfunc' in 'lib2.so' # as bin1a does not override 'wfunc' itself lib1a = bin1a.needed_libs['lib1a.so'] self.assertIn(bin1a.fullname, store[lib2].get_users_by_name('wfunc')) self.assertIn(lib1a, store[lib2].get_users_by_name('wfunc')) # Check that the weak definition in 'bin2' is only marked as external # but the use is actually overriden by the one in 'lib2.so' self.assertSetEqual(store[bin2.fullname].get_users_by_name('one_more'), set(['TOPLEVEL'])) # Check if the deepest transitively called functions has all binaries # as their user lib3 = store[lib2].needed_libs['lib3.so'] for binary in (bin1, bin1a, bin2): self.assertIn(binary.fullname, store[lib3].get_users_by_name('deeper'))
def test_7_store_load(self): store, binary = create_store_and_lib(TEST_BINARY, resolve_libs_recursive=True, call_resolve=True) lib = Library(os.path.abspath(TEST_LIBRARY)) resolved_functions = store.resolve_all_functions(all_entries=True) store.propagate_call_usage(all_entries=True) # Create a temporary file, close it (we only need the path) and dump fd, name = tempfile.mkstemp() os.close(fd) store.dump(name) # Reload into an empty store new_store = LibraryStore(ldconfig_file=LDCONFIG_FILE) new_store.load(name) # The file is no longer needed, delete it os.remove(name) # Assert restoration of store self.assertEqual(store.keys(), new_store.keys()) self.assertIn(lib.fullname, new_store.keys()) self.assertIn(binary.fullname, new_store.keys()) # Assert restoration of needed_libs self.assertIn(lib.fullname, new_store[binary.fullname].needed_libs.values()) lib = new_store[lib.fullname] # Assert restoration of calls self.assertIn(binary.fullname, lib.get_users_by_name('external')) self.assertIn(binary.fullname, lib.get_users_by_name('external_caller')) self.assertIn(binary.fullname, lib.get_users_by_name('second_level_caller'))
dref['total/local functions before'] = csv['local functions before'].sum() dref['total/local functions after'] = csv['local functions after'].sum() dref['total/exported functions before'] = csv['exported functions before'].sum() dref['total/exported functions after'] = csv['exported functions after'].sum() dref['total/functions before'] = csv['exported functions before'].sum() + csv['local functions before'].sum() dref['total/functions after'] = csv['exported functions after'].sum() + csv['local functions after'].sum() dref['total/number of libraries'] = len(csv) else: # The following are for the kernel CSV files dref['total/number of files original'] = csv['number of files original'].sum() dref['total/number of files tailored'] = csv['number of files tailored'].sum() dref['total/number of features original'] = csv['number of features original'].sum() dref['total/number of features tailored'] = csv['number of features tailored'].sum() if store_path and mode == 'library': from librarytrader.librarystore import LibraryStore s = LibraryStore() s.load(store_path) non_libraries = 0 for l in s.get_library_objects(): if '.so' in l.fullname or os.path.basename(l.fullname).startswith('lib'): continue non_libraries += 1 dref['total/number of binaries'] = non_libraries dref.flush()
#!/usr/bin/env python3 import os import sys from librarytrader.librarystore import LibraryStore s = LibraryStore() s.load(sys.argv[1]) seen = set() with open(sys.argv[1] + '_dependencies.dot', 'w') as outfd: outfd.write('digraph D {\n') for l in s.get_library_objects(): this_base = os.path.basename(l.fullname) outfd.write('"{}" [shape=box]\n'.format(this_base)) for outgoing in l.needed_libs.values(): if outgoing in seen: continue outgoing_base = os.path.basename(outgoing) outfd.write('"{}" [shape=box]\n'.format(outgoing_base)) outfd.write('"{}" -> "{}"\n'.format(this_base, outgoing_base)) outfd.write('}\n')
import collections import matplotlib.pyplot as plt import numpy import pandas import statistics import sys from librarytrader.librarystore import LibraryStore #if len(sys.argv) >= 4: print('Arguments: {} <librarystore> <output_filename> [cutoff_x]'.format( sys.argv[0])) # sys.exit(1) print(' * Loading LibraryStore...') store = LibraryStore() store.load(sys.argv[1]) print(' * ... done!') print(' * Collecting all non-zero ranges...') all_ranges = [ size for library in store.get_library_objects() for size in library.ranges.values() if size > 0 ] print(' * ... done!') df = pandas.DataFrame(all_ranges, columns=["Function Size"]) if len(sys.argv) >= 4: max_x = int(sys.argv[3]) else:
def maybe_print_node(library, addr, seen, outfd): if addr not in seen: seen.add(addr) outfd.write(format_node(library, addr) + '\n') def print_edges(library, source, targets, seen, seen_edges, outfd): maybe_print_node(library, source, seen, outfd) for target in targets: maybe_print_node(library, target, seen, outfd) if (source, target) not in seen_edges: seen_edges.add((source, target)) outfd.write(format_edge(library, source, target)) s = LibraryStore() s.load(sys.argv[1]) lname = sys.argv[2] addr = int(sys.argv[3]) depth = int(sys.argv[4]) l = s[lname] outname = os.path.basename( l.fullname) + '_' + hex(addr) + '_' + str(depth) + '.dot' with open(outname, 'w') as outfd: print('writing to {}'.format(outname)) seen = set() seen_edges = set() seen_import = set()
def test_0_fail_on_elferror(self): store = LibraryStore(ldconfig_file=LDCONFIG_FILE) # Makefile isn't an ELF file, so we fail in store._get_or_create_library store.resolve_libs_single_by_path( os.path.abspath(FILE_PATH + 'Makefile')) self.assertEqual(len(store.items()), 0)
#!/usr/bin/python3 import sys import os import shutil from librarytrader.librarystore import LibraryStore s = LibraryStore() s.load(sys.argv[1]) outpath = sys.argv[2] for key, value in s.items(): full_outpath = os.path.join(outpath, key.lstrip('/')) os.makedirs(os.path.dirname(full_outpath), exist_ok=True) if isinstance(value, str): dirs_up_to_root = full_outpath.count('/') - 1 link_target = os.path.join('../' * dirs_up_to_root, value.lstrip('/')) os.symlink(link_target, full_outpath) else: shutil.copy(key, full_outpath, follow_symlinks=False)
storepath = sys.argv[1] collectpath = sys.argv[2] uprobe_file_path = sys.argv[3] num_to_path = {} with open(uprobe_file_path, 'r') as infd: for line in infd: line = line.strip() name, path_and_offset = line.split(' ', 1) path, offset = path_and_offset.split(':') path = os.path.abspath(MOUNT_PREFIX + path) num_to_path[name[2:]] = (path, int(offset, 16)) store = LibraryStore() store.load(storepath) parsed_mapping = collections.defaultdict(set) matches_global = 0 matches_local = 0 traced_only_binaries = 0 traced_only_libraries = 0 histo_by_lib_global = collections.defaultdict(int) histo_by_lib_local = collections.defaultdict(int) with open(collectpath, 'r') as collectfd: for line in collectfd: line = line.strip() path, offset = num_to_path[line] lib = store.get(path)
class Runner(): def __init__(self): self._parse_arguments() self.store = LibraryStore() self.paths = self._get_paths() for arg in [self.args.entry_list, self.args.used_functions]: if arg: entry_points = [] with open(arg, 'r') as fdesc: for line in fdesc: # .split(':') is only required for used_functions but # doesn't harm in entry_list as we need the first # element anyway (which is the full match if ':' does # not exist in the input line) cur_lib = line.strip().split(':')[0] if os.path.isfile(cur_lib): entry_points.append(cur_lib) entry_points = list(sorted(set(entry_points))) self.store.set_additional_entry_points(entry_points) self.paths.extend(entry_points) self.all_resolved_functions = None def _parse_arguments(self): parser = argparse.ArgumentParser(description='Evaluate imports and ' \ 'exports of .so libraries and ELF executables.') parser.add_argument('paths', type=str, nargs='*', help='the paths to process') parser.add_argument('-v', '--verbose', action='store_true', help='verbose output') parser.add_argument('--debug', action='store_true', help=argparse.SUPPRESS) parser.add_argument('-l', '--load', action='store', help='JSON file to load previously exported mapping') parser.add_argument('-s', '--store', action='store', help='Store calculated mapping to JSON file') parser.add_argument('-r', '--resolve-functions', action='store_true', help='Resolve imported functions to their origin') parser.add_argument('-i', '--interface_calls', action='store_true', help='Calculate calls between interface functions') parser.add_argument('-t', '--transitive-users', action='store_true', help='Propagate users over interface calls ' \ '(implies -r)') parser.add_argument('-a', '--all-entries', action='store_true', help='Use all libraries as entry points for ' \ 'function resolution. Default: only executables') parser.add_argument('-e', '--entry-list', action='store', help='Use paths inside the given file as entry ' \ 'points regardless of their executable status') parser.add_argument('-u', '--used-functions', action='store', help='A file with path:name tuples which are ' \ 'referenced symbols from dlsym') parser.add_argument('--single', action='store_true', help='Do not recursively resolve libraries') parser.add_argument('--uprobe-strings', action='store_true', help='Generate uprobe strings into a file') parser.add_argument('--loaderlike', action='store_true', help='Resolve functions only from executables ' \ 'while respecting weak symbols') parser.add_argument('--write-csvs', action='store_true', help='write .csv files with statistics') parser.add_argument('--leave-undef-unused', action='store_true', help='Keep targets for SHN_UNDEF functions unused '\ 'during function resolution. This allows us to ' \ 'later remove unused SHN_UNDEF entries and their '\ 'counterparts.') self.args = parser.parse_args() loglevel = logging.WARNING if self.args.verbose: loglevel = logging.INFO if self.args.debug: loglevel = logging.DEBUG logging.basicConfig(format='%(asctime)s %(levelname)-7s %(message)s', level=loglevel) if not self.args.store: self.args.store = "" if not self.args.load and not self.args.paths: logging.error('Please load results and/or provide paths to analyze') parser.print_help() sys.exit(1) def _get_paths(self): result = [] for arg in self.args.paths: if os.path.isdir(arg): for entry in os.listdir(arg): fullpath = os.path.join(os.path.abspath(arg), entry) if os.path.isfile(fullpath): result.append(fullpath) else: result.append(arg) return result def process(self): if self.args.load: self.store.load(self.args.load) logging.info('Processing %d paths in total', len(self.paths)) # print([x.fullname for x in self.store.get_executable_objects() if '.so' in x.fullname]) # print(len(self.store.get_all_reachable_from_executables())) # print([x.fullname for x in self.store.get_library_objects() if 'libgcj.so.16' in x.needed_libs]) # print(len([x for (x, y) in self.store['/lib/x86_64-linux-gnu/libc-2.23.so'].exports.items() if y and len(y) > 0])) for path in self.paths: logging.info('Processing %s', path) if self.args.single: self.store.resolve_libs_single_by_path(path) else: self.store.resolve_libs_recursive_by_path(path) logging.info('Number of entries: %d', len(self.store)) if self.args.interface_calls: self._process_interface_calls() if self.args.used_functions: self._mark_extra_functions_as_used() if self.args.resolve_functions: self.get_all_resolved_functions(self.args.leave_undef_unused) if self.args.transitive_users and not self.args.loaderlike: self._propagate_users_through_calls() if self.args.store: self.store.dump(self.args.store) if self.args.uprobe_strings: self.store.generate_uprobe_strings('{}_uprobe_strings'.format(self.args.store)) def _create_export_user_mapping(self): result = {} libobjs = self.store.get_entry_points(self.args.all_entries) for lib in libobjs: result[lib.fullname] = {} for function, users in lib.export_users.items(): result[lib.fullname][function] = users return result def _process_interface_calls(self): resolve_calls(self.store) def get_all_resolved_functions(self, leave_undef_unused=False): if self.all_resolved_functions is None: # If the targets of SHN_UNDEF symbols should be marked as used # during function resolution, we force them to be added force_add = not leave_undef_unused if self.args.loaderlike: self.store.resolve_all_functions_from_binaries(force_add_to_exports=force_add) else: self.store.resolve_all_functions(self.args.all_entries, force_add_to_exports=force_add) self.all_resolved_functions = self._create_export_user_mapping() return self.all_resolved_functions def _mark_extra_functions_as_used(self): logging.info('Marking extra functions as used from \'%s\'', self.args.used_functions) with open(self.args.used_functions, 'r') as infd: for line in infd: path, function = line.strip().split(':') library = self.store.get_from_path(path) if not library: continue addrs = set() if function.startswith('LOCAL_'): if function[6:].isdigit(): addrs.add(int(function[6:])) elif function[6:].startswith('0x'): addrs.add(int(function[8:], base=16)) else: addrs.update(library.find_local_functions(function[6:])) logging.debug('_mark_extra_functions: found match for '\ 'local function pattern \'%s\' at %s', function[6:], addrs) elif function.startswith('GLOBAL_'): if function[7:].isdigit(): addrs.add(int(function[7:])) elif function[7:].startswith('0x'): addrs.add(int(function[9:], base=16)) else: export = library.find_exports_by_pattern(function) if export: logging.debug('_mark_extra_functions: found global '\ 'addrs %s for %s', export, function) addrs.update(export) addrs.update(library.find_local_functions(function)) if not addrs: logging.warning('mark_extra: %s not found in %s', function, library.fullname) continue for addr in addrs: library.add_export_user(addr, 'EXTERNAL') def _propagate_users_through_calls(self): self.get_all_resolved_functions(self.args.leave_undef_unused) self.store.propagate_call_usage(self.args.all_entries) self.all_resolved_functions = self._create_export_user_mapping() return self.all_resolved_functions def print_needed_paths(self): # Demonstration for needed paths resolution libobjs = self.store.get_library_objects() lib = next(iter(libobjs)) print('= Needed libraries for {}'.format(lib.fullname)) for name, path in lib.needed_libs.items(): print('-- {} => {}'.format(name, path)) print('= All imported libraries for {}'.format(lib.fullname)) for name, path in lib.all_imported_libs.items(): print('-- {} => {}'.format(name, path)) histo = collections.defaultdict(int) for lib in libobjs: histo[len(list(lib.needed_libs.keys()))] += 1 with open('{}_needed_histo.csv'.format(self.args.store), 'w') as outfd: for num, count in sorted(histo.items()): outfd.write('{},{}\n'.format(num, count)) def resolve_and_print_one(self): # Demonstration for resolving libobjs = self.store.get_library_objects() lib = next(iter(libobjs)) print('= Resolving functions in {}'.format(lib.fullname)) self.store.resolve_functions(lib) for function, path in lib.imports.items(): print("-- Found {} in {}".format(function, path)) def count_and_print_resolved(self, do_print=True): collection = self.get_all_resolved_functions() histo_percent = collections.defaultdict(list) histo_local = collections.defaultdict(list) histo_total = collections.defaultdict(list) if do_print: print('= Count of all external function uses:') # Print sorted overview for lib, functions in collection.items(): if do_print: print('- Function uses in \'{}\''.format(lib)) for function, importers in sorted(functions.items(), key=lambda x: (-len(x[1]), x[0])): if do_print: print('-- {}: {}: {}'.format(function, len(importers), importers)) n_global = 0 n_local = 0 u_global = 0 u_local = 0 if self.store[lib].exported_addrs and ".so" in lib: n_global = len(self.store[lib].exported_addrs) u_global = len(list(x for (x, y) in functions.items() if y)) pctg = u_global / n_global ipctg = int(pctg * 100) if 'libc-2.2' in lib or 'libstdc++' in lib or 'libgcj' in lib: #and do_print: print(ipctg, pctg, len(list(x for (x, y) in functions.items() if y)), lib) histo_percent[ipctg].append(lib) if self.store[lib].local_functions and ".so" in lib: n_local = len(self.store[lib].local_functions) u_local = len(list(x for (x, y) in self.store[lib].local_users.items() if y)) pctg = u_local / n_local ipctg = int(pctg * 100) if 'libc-2.2' in lib or 'libstdc++' in lib or 'libgcj' in lib: #and do_print: print(ipctg, pctg, len(list(x for (x, y) in functions.items() if y)), lib) histo_local[ipctg].append(lib) if ".so" in lib and (n_local + n_global) != 0: total_pctg = (u_local + u_global) / (n_local + n_global) total_ipctg = int(total_pctg * 100) histo_total[total_ipctg].append(lib) if not self.args.write_csvs: return with open('{}_import_use_histo.csv'.format(self.args.store), 'w') as outfd: for key in range(101): outfd.write('{},{},{}\n'.format(key, len(histo_percent[key]), histo_percent[key])) with open('{}_local_use_histo.csv'.format(self.args.store), 'w') as outfd: for key in range(101): outfd.write('{},{},{}\n'.format(key, len(histo_local[key]), histo_local[key])) with open('{}_total_use_histo.csv'.format(self.args.store), 'w') as outfd: for key in range(101): outfd.write('{},{},{}\n'.format(key, len(histo_total[key]), histo_total[key])) def do_import_export_histograms(self): libobjs = self.store.get_entry_points(self.args.all_entries) histo_in = collections.defaultdict(int) histo_out = collections.defaultdict(int) for lib in libobjs: num_imports = len(list(lib.imports.keys())) num_exports = len(list(lib.exported_addrs.keys())) histo_in[num_imports] += 1 histo_out[num_exports] += 1 # if num_exports > 20000: # print('Exporter {}: {}'.format(lib.fullname, num_exports)) # if num_imports > 3000: # print('Importer {}: {}'.format(lib.fullname, num_imports)) print('Most called functions (directly and transitively):') res = [] for library in libobjs: for function, callers in library.export_users.items(): count = len(callers) res.append(('{}:{}'.format(library.fullname, library.exported_addrs[function]), count)) sorted_callees = list(sorted(res, key=lambda x: x[1], reverse=True)) for name, count in sorted_callees[:10]: print('{}\t{}'.format(name, count)) if self.args.write_csvs: with open('{}_called_functions.csv'.format(self.args.store), 'w') as outfd: for name, count in sorted_callees: outfd.write('{},{}\n'.format(name, count)) print('Top 10 NEEDED') sorted_needed = list(sorted(libobjs, key=lambda x: len(list(x.needed_libs)), reverse=True)) for library in sorted_needed[:10]: print('{}: {}'.format(library.fullname, len(list(library.needed_libs)))) if self.args.write_csvs: with open('{}_needed_libraries.csv'.format(self.args.store), 'w') as outfd: for library in sorted_needed: outfd.write('{},{}\n'.format(library.fullname, len(list(library.needed_libs)))) print('Top 10 importers:') top_importers = list(sorted(libobjs, key=lambda x: len(list(x.imports)), reverse=True)) for library in top_importers[:10]: print('{}: {}'.format(library.fullname, len(list(library.imports)))) if self.args.write_csvs: with open('{}_number_of_imports.csv'.format(self.args.store), 'w') as outfd: for library in top_importers: outfd.write('{},{}\n'.format(library.fullname, len(list(library.imports)))) with open('{}_imports_histo.csv'.format(self.args.store), 'w') as outfd: for key, value in sorted(histo_in.items()): outfd.write('{},{}\n'.format(key, value)) print('Top 10 exporters:') top_exporters = list(sorted(libobjs, key=lambda x: len(list(x.exported_addrs)), reverse=True)) for library in top_exporters[:10]: print('{}: {}'.format(library.fullname, len(list(library.exported_addrs)))) if self.args.write_csvs: with open('{}_number_of_exports.csv'.format(self.args.store), 'w') as outfd: for library in top_exporters: outfd.write('{},{}\n'.format(library.fullname, len(list(library.exported_addrs)))) with open('{}_exports_histo.csv'.format(self.args.store), 'w') as outfd: for key, value in sorted(histo_out.items()): outfd.write('{},{}\n'.format(key, value)) def print_store_keys(self): for key, _ in sorted(self.store.items()): print(key)