def __generate_facts(self) -> None: os.symlink(self.bytecode_path, join(self.work_dir, 'contract.hex')) with open(self.bytecode_path) as file: bytecode = ''.join([l.strip() for l in file if len(l.strip()) > 0]) blocks = blockparse.EVMBytecodeParser(bytecode).parse() exporter.InstructionTsvExporter(blocks).export(output_dir=self.work_dir, bytecode_hex=bytecode)
def count_opcodes(bytecode: t.Union[str, bytes]) -> collections.Counter: """ count_opcodes counts the number of each type of opcode from a given bytecode sequence, returning a dict-compatible collections.Counter. """ parser = blockparse.EVMBytecodeParser(bytecode) parser.parse() # convert EVMOps to OpCodes ops = list(map(lambda op: op.opcode, parser._ops)) # use Python's Counter to count each return collections.Counter(ops), ops
def analyze_contract(job_index: int, index: int, contract_filename: str, result_queue, timeout) -> None: """ Perform dataflow analysis on a contract, storing the result in the queue. This is a worker function to be passed to a subprocess. Args: job_index: the job number for this invocation of analyze_contract index: the number of the particular contract being analyzed contract_filename: the absolute path of the contract bytecode file to process result_queue: a multiprocessing queue in which to store the analysis results """ disassemble_start = time.time() def calc_timeout(): timeout_left = timeout - time.time() + disassemble_start return max(timeout_left, args.minimum_client_time) def run_clients(souffle_clients, other_clients, in_dir, out_dir): errors = [] timeouts = [] for souffle_client in souffle_clients: if not args.interpreted: analysis_args = [ join(os.getcwd(), souffle_client + SOUFFLE_COMPILED_SUFFIX), f"--facts={in_dir}", f"--output={out_dir}" ] else: analysis_args = [ DEFAULT_SOUFFLE_BIN, join(os.getcwd(), souffle_client), f"--fact-dir={in_dir}", f"--output-dir={out_dir}" ] if run_process(analysis_args, calc_timeout()) < 0: timeouts.append(souffle_client) for other_client in other_clients: other_client_split = [o for o in other_client.split(' ') if o] other_client_split[0] = join(os.getcwd(), other_client_split[0]) other_client_name = other_client_split[0].split('/')[-1] err_filename = join(out_dir, other_client_name + '.err') runtime = run_process(other_client_split, calc_timeout(), devnull, open(err_filename, 'w'), cwd=in_dir) if len(open(err_filename).read()) > 0: errors.append(other_client_name) if runtime < 0: timeouts.append(other_client) return timeouts, errors try: # prepare working directory exists, work_dir, out_dir = prepare_working_dir(contract_filename) assert not (args.restart and exists) analytics = {} contract_name = os.path.split(contract_filename)[1] with open(contract_filename) as file: bytecode = file.read().strip() if exists: decomp_start = time.time() inline_start = time.time() else: # Disassemble contract blocks = blockparse.EVMBytecodeParser(bytecode).parse() exporter.InstructionTsvExporter(blocks).export( output_dir=work_dir, bytecode_hex=bytecode) os.symlink(join(work_dir, 'bytecode.hex'), join(out_dir, 'bytecode.hex')) if os.path.exists(join(work_dir, 'solidity_version.csv')): # Create a symlink with a name starting with 'Verbatim_' to be added to results json os.symlink(join(work_dir, 'solidity_version.csv'), join(out_dir, 'Verbatim_solidity_version.csv')) run_clients(souffle_pre_clients, other_pre_clients, work_dir, work_dir) if args.context_depth is not None: context_depth_filename = os.path.join(work_dir, 'MaxContextDepth.csv') context_depth_file = open(context_depth_filename, "w") context_depth_file.write(f"{args.context_depth}\n") context_depth_file.close() # Run souffle on those relations decomp_start = time.time() run_clients([DEFAULT_DECOMPILER_DL], [], work_dir, out_dir) inline_start = time.time() if not args.disable_inline: run_clients([DEFAULT_INLINER_DL] * 4, [], out_dir, out_dir) # end decompilation if exists and not args.rerun_clients: return client_start = time.time() timeouts, errors = run_clients(souffle_clients, other_clients, out_dir, out_dir) # Collect the results and put them in the result queue files = [] for fname in os.listdir(out_dir): fpath = join(out_dir, fname) if getsize(fpath) != 0: files.append(fname.split(".")[0]) meta = [] # Decompile + Analysis time analytics['disassemble_time'] = decomp_start - disassemble_start analytics['decomp_time'] = inline_start - decomp_start analytics['inline_time'] = client_start - inline_start analytics['client_time'] = time.time() - client_start analytics['errors'] = len(errors) analytics['bytecode_size'] = (len(bytecode) - 2) // 2 log("{}: {:.36} completed in {:.2f} + {:.2f} + {:.2f} + {:.2f} secs". format(index, contract_name, analytics['disassemble_time'], analytics['decomp_time'], analytics['inline_time'], analytics['client_time'])) if errors: log(f"Errors in: {', '.join(errors)}") if timeouts: log(f"Timeouts in: {', '.join(timeouts)}") get_gigahorse_analytics(out_dir, analytics) result_queue.put((contract_name, files, meta, analytics)) except TimeoutException as e: result_queue.put((contract_name, [], ["TIMEOUT"], {})) log("{} timed out.".format(contract_name)) except Exception as e: log(f"Error: {e}") result_queue.put((contract_name, [], ["error"], {}))
# for multiple input files, comment above each output with the # path of its file if hasattr(args.infile, '__len__') and len(args.infile) > 1: fname_comment = "; Disassembly from\n; {}\n".format(infile.name) if args.prettify: fname_comment = colored(fname_comment, COMMENT_COL, attrs=['dark']) print(fname_comment, file=args.outfile) # join the bytecode all into one string bytecode = ''.join(l.strip() for l in infile if len(l.strip()) > 0) # parse bytecode and create basic blocks blocks = blockparse.EVMBytecodeParser(bytecode).parse() # Print disassembly from each block for b in blocks: for op in b.evm_ops: print(format_pc(op.pc), format_opcode(op.opcode), format_value(op.value), file=args.outfile) if args.prettify: print("", file=args.outfile) # for multiple input files, separate output of each file with a newline if hasattr(args.infile, '__len__') and i + 1 < len(args.infile): print("", file=args.outfile)
def analyze_contract(job_index: int, index: int, contract_filename: str, result_queue, timeout) -> None: """ Perform dataflow analysis on a contract, storing the result in the queue. This is a worker function to be passed to a subprocess. Args: job_index: the job number for this invocation of analyze_contract index: the number of the particular contract being analyzed contract_filename: the absolute path of the contract bytecode file to process result_queue: a multiprocessing queue in which to store the analysis results """ try: # prepare working directory exists, work_dir, out_dir = prepare_working_dir(contract_filename) assert not(args.restart and exists) analytics = {} contract_name = os.path.split(contract_filename)[1] disassemble_start = time.time() def calc_timeout(): return timeout-time.time()+disassemble_start if exists: decomp_start = time.time() else: with open(contract_filename) as file: bytecode = file.read().strip() # Disassemble contract blocks = blockparse.EVMBytecodeParser(bytecode).parse() exporter.InstructionTsvExporter(blocks).export(output_dir=work_dir, bytecode_hex=bytecode) os.symlink(join(work_dir, 'bytecode.hex'), join(out_dir, 'bytecode.hex')) # Run souffle on those relations decomp_start = time.time() if not args.interpreted: analysis_args = [join(os.getcwd(), DEFAULT_SOUFFLE_EXECUTABLE), "--facts={}".format(work_dir), "--output={}".format(out_dir) ] else: analysis_args = [DEFAULT_SOUFFLE_BIN, DEFAULT_DECOMPILER_DL, "--fact-dir={}".format(work_dir), "--output-dir={}".format(out_dir) ] runtime = run_process(analysis_args, calc_timeout()) if runtime < 0: result_queue.put((contract_filename, [], ["TIMEOUT"], {})) log("{} timed out.".format(contract_filename)) return # end decompilation if exists and not args.rerun_clients: return client_start = time.time() for souffle_client in souffle_clients: if not args.interpreted: analysis_args = [join(os.getcwd(), souffle_client+'_compiled'), "--facts={}".format(out_dir), "--output={}".format(out_dir) ] else: analysis_args = [DEFAULT_SOUFFLE_BIN, join(os.getcwd(), souffle_client), "--fact-dir={}".format(out_dir), "--output-dir={}".format(out_dir) ] runtime = run_process(analysis_args, calc_timeout()) if runtime < 0: result_queue.put((contract_name, [], ["TIMEOUT"], {})) log("{} timed out.".format(contract_name)) return for python_client in python_clients: out_filename = join(out_dir, python_client.split('/')[-1]+'.out') err_filename = join(out_dir, python_client.split('/')[-1]+'.err') runtime = run_process([join(os.getcwd(), python_client)], calc_timeout(), open(out_filename, 'w'), open(err_filename, 'w'), cwd = out_dir) if runtime < 0: result_queue.put((contract_name, [], ["TIMEOUT"], {})) log("{} timed out.".format(contract_name)) return # Collect the results and put them in the result queue files = [] for fname in os.listdir(out_dir): fpath = join(out_dir, fname) if getsize(fpath) != 0: files.append(fname.split(".")[0]) meta = [] # Decompile + Analysis time analytics['disassemble_time'] = decomp_start - disassemble_start analytics['decomp_time'] = client_start - decomp_start analytics['client_time'] = time.time() - client_start log("{}: {:.36} completed in {:.2f} + {:.2f} + {:.2f} secs".format( index, contract_name, analytics['disassemble_time'], analytics['decomp_time'], analytics['client_time'] )) get_gigahorse_analytics(out_dir, analytics) result_queue.put((contract_name, files, meta, analytics)) except Exception as e: log("Error: {}".format(e)) result_queue.put((contract_name, [], ["error"], {}))