def _load_coverage_data(self, filename): utils.exit_unless_accessible(filename) self.df_cov = pd.read_csv(filename, sep=None, engine='python') self.df_cov.reset_index(drop=True, inplace=True) self.df_cov.columns = self.df_cov.columns.str.lower() require_cols = ['function', 'filename'] if not all(x in list(self.df_cov.columns.values) for x in require_cols): _LOGGER.error("Coverage file '%s' missing required headers: %s" % (filename, require_cols)) exit(1) if self.df_cov.isnull().values.any(): _LOGGER.error("Empty values in coverage data: %s" % filename) exit(1) # Normalize paths in the callgraph database. # This operation takes some time, so we don't do it in the suspected # usual case - when coverage data is not provided. However, when # coverage data _is_ provided, we need to normalize the filename # paths also in the callgraph database so that they become comparable # to filename paths in the coverage data. self.df['caller_filename'] = self.df['caller_filename'].map( lambda a: a if pd.isnull(a) else os.path.normpath(a)) self.df['callee_filename'] = self.df['callee_filename'].map( lambda a: a if pd.isnull(a) else os.path.normpath(a)) # Normalize paths in the coverage data self.df_cov['filename'] = self.df_cov['filename'].map( lambda a: a if pd.isnull(a) else os.path.normpath(a)) # Adjust filenames in coverage data to make them relative to # the kernel tree directory. This needs to be done so that the # filename paths in coverage data become comparable to the # filename paths in the callgraph database: example_cov_file = self.df_cov['filename'].iloc[0] # Warn if it looks like filepaths don't match between the # coverage data and the callgraph data. # Possible reasons include: # - Absolute vs relative filepaths # - Coverage data is from different build compared to callgraph data df = self.df[(self.df['caller_filename'] == example_cov_file)] if df.empty: _LOGGER.warn( "Filename '%s' from the coverage data is not in the " "callgraph database. File paths in coverage data will " "likely not match the file paths in callgraph database." % example_cov_file)
def _load_callgraph_data(self, filename): utils.exit_unless_accessible(filename) self.df = pd.read_csv(filename, na_values=[''], keep_default_na=False) self.df.reset_index(drop=True, inplace=True) self.df.columns = self.df.columns.str.lower() require_cols = [ 'caller_function', 'caller_filename', 'caller_def_line', 'caller_line', 'callee_function', 'callee_filename', 'callee_line', ] if not all(x in list(self.df.columns.values) for x in require_cols): _LOGGER.error( "Callgraph database '%s' missing required headers: %s" % (filename, require_cols)) exit(1)
parser.add_argument("--out", help=help, default="chains.csv") choices = ["left", "right", "both"] help = "selects search direction." parser.add_argument("--direction", help=help, choices=choices, default="right") help = "select cutoff length for path search" parser.add_argument("--cutoff", help=help, type=int, default=10) help = "set the verbosity level (e.g. -vv for debug level)" parser.add_argument( "-v", "--verbose", help=help, action="count", default=1) return parser.parse_args() if __name__ == "__main__": args = getargs() utils.exit_unless_accessible(args.calls) utils.setup_logging(verbosity=args.verbose) # Load graph database (remove duplicates) df_all = df_from_csv_file(args.calls) df = df_all.drop_duplicates() from_fun, to_fun = args.from_function, args.to_function left, right = search_settings(args.direction, args.cutoff) merge_on = ["caller_filename", "caller_function", "callee_filename", "callee_function"] chains_df_right = pd.DataFrame(columns=merge_on) if right: chains_df_right = find_chains_directed_df(df, from_fun, to_fun, right) chains_df_left = pd.DataFrame(columns=merge_on)
"contains the COMPDB. To disable this default behaviour and work "\ "from the current working directory instead, enable this flag." parser.add_argument('--keepcwd', help=help, action='store_true') return parser.parse_args() ################################################################################ def compdb2bc(args): set_clang_bindings_and_lib(args.cindexpy, args.libclang) compiler = BitcodeCompiler( compdb=args.compdb, srcfile=args.file, append_arg=args.append_arg, clang=args.clang, keepcwd=args.keepcwd, ) compiler.generate_bitcode() if __name__ == "__main__": args = command_line_args(_FILEDIR) utils.setup_logging(verbosity=args.verbose) utils.exit_unless_accessible(args.compdb) utils.exit_unless_accessible(args.file) compdb2bc(args) ################################################################################
"--coverage coverage.input --out coverage.csv" % \ os.path.basename(__file__) parser = argparse.ArgumentParser(description=desc, epilog=epil) help = "Path to source root. Specify if you want paths in coverage data to be stored"\ " relative to the project root" parser.add_argument("--project_root", help=help, default="") help = "Format of the coverage input file" parser.add_argument("--format", help=help, required=True, choices=["syzkaller"]) help = "File with coverage data in specified format" parser.add_argument("--coverage", help=help, required=True) help = "Output file where data in CallGraph tool format will be exported" parser.add_argument('--out', help=help, required=True) help = "Set the verbosity level (e.g. -vv for debug level)" parser.add_argument( '-v', '--verbose', help=help, action='count', default=1) return parser.parse_args() if __name__ == '__main__': args = getargs() utils.exit_unless_accessible(args.coverage) utils.setup_logging(verbosity=args.verbose) if args.format == "syzkaller": df = df_from_csv_file(args.coverage) df = syzkaller_calculate_cov(df, args.project_root) df_to_csv_file(df, args.out)
help = "select cutoff length for path search" parser.add_argument("--cutoff", help=help, type=int, default=10) help = "set the verbosity level (e.g. -vv for debug level)" parser.add_argument("-v", "--verbose", help=help, action="count", default=1) return parser.parse_args() if __name__ == "__main__": args = getargs() for call in args.calls: utils.exit_unless_accessible(call) utils.setup_logging(verbosity=args.verbose) # Load graph database (remove duplicates) if args.algorithm == 'ancestor': df_all = df_from_csv_file(args.calls[0]) df = df_all.drop_duplicates() f1, f2 = args.function1, args.function2 lca = find_lca(df, f1, f2) lca_l = [] for node in lca: df_row = df_all[(df_all['caller_function'] == node.function) & (df_all['caller_filename'] == node.filename)] lca_entry = {
"or when the specified call chain depth is reached." parser.add_argument('--until_function', help=help) help = "Colorize functions that match the specified regular expression." parser.add_argument('--colorize', help=help) help = "Include function coverage data into the graph from the specified "\ "file." parser.add_argument('--coverage_file', help=help) help = "Set the verbose level (defaults to --v=1)" parser.add_argument('--verbose', help=help, type=int, default=1) return parser.parse_args() ################################################################################ if __name__ == "__main__": args = getargs() utils.exit_unless_accessible(args.csv) utils.setup_logging(verbosity=args.verbose) gradient_list_generate() _LOGGER.info("reading input csv") g = Grapher(args.csv) g.graph(args) ################################################################################