def dump(self, dataset_path): if exists(dataset_path): logger.warning("File exists: {}".format(dataset_path)) dataset_name = basename(dataset_path) feats_filename = Dataset.FEATS_TEMPL.format(dataset_name) data_filename = Dataset.DATA_TEMPL.format(dataset_name) constr_filename = Dataset.CONSTRAINTS_TEMPL.format(dataset_name) folder = abspath(dirname(dataset_path)) feats_path = join(folder, feats_filename) data_path = join(folder, data_filename) constr_path = join(folder, constr_filename) self.dump_feats(feats_path) self.dump_data(data_path) index = { 'feats_path': relpath(feats_path, folder), 'data_path': relpath(data_path, folder) } if self.constraints is not None: write_smtlib(self.constraints, constr_path) index['constr_path'] = relpath(constr_path, folder) with open(dataset_path, 'wb') as f: pickle.dump(index, f)
def save_to(self, outfile): """ Save the encoding into a file with a given name. """ if outfile.endswith('.txt'): outfile = outfile[:-3] + 'smt2' write_smtlib(self.enc, outfile) # appending additional information with open(outfile, 'r') as fp: contents = fp.readlines() # comments comments = [ '; features: {0}\n'.format(', '.join(self.feats)), '; classes: {0}\n'.format(self.nofcl) ] if self.intvs: for f in self.xgb.extended_feature_names_as_array_strings: c = '; i {0}: '.format(f) c += ', '.join([ '{0}<->{1}'.format(u, v) for u, v in zip(self.intvs[f], self.ivars[f]) ]) comments.append(c + '\n') contents = comments + contents with open(outfile, 'w') as fp: fp.writelines(contents)
def dump(self, problem_path): if problem_path is None and self.original_path is None: raise IOError("Unspecified path") elif problem_path is not None and exists(problem_path): raise IOError("File exists: {}".format(problem_path)) elif problem_path is None and self.original_path is not None: msg = "Dumping the problem with no specified path, using {}" logger.debug(msg.format(self.original_path)) problem_path = self.original_path problem_name = basename(problem_path) folder = abspath(dirname(problem_path)) model_filename = Problem.MODEL_TEMPL.format(problem_name) model_path = join(folder, model_filename) if self.original_path is None: self.model.dump(model_path) index = { 'model_path': relpath(model_path, folder), 'dataset_paths': {} } for dataset_name, dataset in self.datasets.items(): dataset_filename = Problem.DATASET_TEMPL.format( problem_name, dataset_name) dataset_path = join(folder, dataset_filename) if self.original_path is None: dataset.dump(dataset_path) index['dataset_paths'][dataset_name] = relpath( dataset_path, folder) if len(self.learned_supports) > 0: index['support_paths'] = [] for i, chi in enumerate(self.learned_supports): support_filename = Problem.SUPPORT_TEMPL.format( problem_name, i) support_path = join(folder, support_filename) logger.debug("Writing support file: {}".format(support_path)) write_smtlib(chi, support_path) index['support_paths'].append(relpath(support_path, folder)) if self.bounds is not None: index['bounds'] = self.bounds if self.metadata is not None: index['metadata'] = self.metadata with open(problem_path, 'wb') as f: pickle.dump(index, f)
def test_read_and_write_shortcuts(self): fs = get_example_formulae() fdi, tmp_fname = mkstemp() os.close(fdi) # Close initial file descriptor for (f_out, _, _, _) in fs: write_smtlib(f_out, tmp_fname) f_in = read_smtlib(tmp_fname) self.assertEqual(f_out.simplify(), f_in.simplify()) # Clean-up os.remove(tmp_fname)
def renorm_wrap(inst, support, support_path, weight_path): try: inst.renormalize(support) support = inst.tree_to_WMI_support() weight = inst.tree_to_WMI_weightfun() msg = "Writing result to files:\n{}\n{}" logger.debug(msg.format(support_path, weight_path)) write_smtlib(support, support_path) write_smtlib(weight, weight_path) logger.debug("Done.") except ModelException as e: logger.error( "Couldn't renormalize the DET: {}".format(e))
def generate_ppc_from_formula(path_condition): ppc_list = list() while path_condition.is_and(): constraint = path_condition.arg(1) constraint_str = str(constraint.serialize()) if "!rvalue!" in constraint_str or "!obs!" in constraint_str: path_condition = path_condition.arg(0) continue path_script = "/tmp/z3_script_ppc" write_smtlib(path_condition, path_script) with open(path_script, "r") as script_file: script_lines = script_file.readlines() script = "".join(script_lines) ppc_list.append(("-no-info-", script)) path_condition = path_condition.arg(0) return ppc_list
def generate_extended_patch_formula(patch_formula, path_condition): angelic_count = int( len(re.findall("angelic!(.+?)!0", str(path_condition.serialize()))) / 4) # if angelic_count == 0: # print("COUNT", angelic_count) # print("PATH", str(path_condition.serialize())) # utilities.error_exit("angelic count is zero in extending") if angelic_count <= 1: return patch_formula # model_path = generate_model(path_condition) # var_list = list(model_path.keys()) # count = 0 # for var in var_list: # if "angelic!bool" in var: # count = count + 1 input_list = list() path_script = "/tmp/z3_script_patch" write_smtlib(patch_formula, path_script) with open(path_script, "r") as script_file: script_lines = script_file.readlines() script = "".join(script_lines) var_list = set(re.findall("\(declare-fun (.+?) \(\)", script)) for var in var_list: if "const" not in var: input_list.append(var) formula_txt = script formula_list = [] for index in range(1, angelic_count): postfix = "_" + str(index) substituted_formula_txt = formula_txt for input_var in input_list: if "|" in input_var: input_var_postfix = input_var[:-1] + postfix + "|" else: input_var_postfix = input_var + postfix substituted_formula_txt = substituted_formula_txt.replace( input_var, input_var_postfix) formula = generate_formula(substituted_formula_txt) formula_list.append(formula) constraint_formula = patch_formula for formula in formula_list: constraint_formula = And(constraint_formula, formula) return constraint_formula
def generate_model_cli(formula): """ This function will invoke the Z3 Cli interface to solve the provided formula and return the model byte list Arguments: formula: smtlib formatted formula """ emitter.normal("\textracting z3 model") path_script = "/tmp/z3_script_model_cli" path_result = "/tmp/z3_output_model_cli" write_smtlib(formula, path_script) with open(path_script, "a") as script_file: script_file.writelines(["(get-model)\n", "(exit)\n"]) z3_command = "z3 " + path_script + " > " + path_result utilities.execute_command(z3_command) with open(path_result, "r") as result_file: z3_output = result_file.readlines() model_byte_list = parser.parse_z3_output(z3_output) return model_byte_list
def generate_ppc_from_formula(path_condition): ppc_list = list() emitter.normal("\textracting branches from path condition") max_count = 2 * values.DEFAULT_MAX_FLIPPINGS while path_condition.is_and(): constraint = path_condition.arg(1) constraint_str = str(constraint.serialize()) if "!rvalue!" in constraint_str or "!obs!" in constraint_str: path_condition = path_condition.arg(0) continue path_script = "/tmp/z3_script_ppc" write_smtlib(path_condition, path_script) with open(path_script, "r") as script_file: script_lines = script_file.readlines() script = "".join(script_lines) ppc_list.append(("-no-info-", script)) path_condition = path_condition.arg(0) if len(ppc_list) > max_count: emitter.warning("\t[warning] maximum cap reach for branching") break return ppc_list
def generate_tree(n_reals, n_bools, depth, n_formulas, seed, output_path): gen = ModelGenerator(n_reals, n_bools, seed) problem_instances = [] while len(problem_instances) < n_formulas: support = gen.generate_support_tree(depth) weights = gen.generate_weights_tree(depth) query = Synthetic._random_query(gen, depth) try: _ = WMIInference(support, weights, check_consistency=True) instance_name = output_path + "_" + str(len(problem_instances)) support_name = instance_name + ".support" weights_name = instance_name + ".weights" query_name = instance_name + ".query" write_smtlib(support, support_name) write_smtlib(weights, weights_name) write_smtlib(query, query_name) instance = (support_name, weights_name, query_name) problem_instances.append(instance) except WMIRuntimeException: continue output_file = open(output_path, 'w') pickle.dump(problem_instances, output_file) output_file.close()
def test_pa_iff_real(): pytest.skip("Bug fix requires changing PA solver") domain = Domain.make([], ["x", "y"], real_bounds=(-1, 1)) x, y = domain.get_symbols() c = 0.00000001 f1 = (x * c >= 0) & (x * c <= y * c) & (y * c < c) f2 = normalize_formula(f1) print(smt_to_nested(f2)) pa_vol1 = PredicateAbstractionEngine( domain, domain.get_bounds() & (f1 | f2) & (~f1 | ~f2), smt.Real(1.0)).compute_volume() smt.write_smtlib(domain.get_bounds() & (f1 | f2) & (~f1 | ~f2), "test_pa_iff_real.support") smt.write_smtlib(smt.Real(1.0), "test_pa_iff_real.weight") pa_vol2 = PredicateAbstractionEngine(domain, smt.Iff(f1, ~f2), smt.Real(1.0)).compute_volume() pa_vol3 = PredicateAbstractionEngine(domain, ~smt.Iff(f1, f2), smt.Real(1.0)).compute_volume() assert pa_vol1 == pytest.approx(0, REL_ERROR**3) assert pa_vol2 == pytest.approx(0, REL_ERROR**3) assert pa_vol3 == pytest.approx(0, REL_ERROR**3)
def dump(self, model_path): if exists(model_path): logger.warning("File exists: {}".format(model_path)) model_name = basename(model_path) support_filename = Model.SUPPORT_TEMPL.format(model_name) weightf_filename = Model.WEIGHTF_TEMPL.format(model_name) folder = abspath(dirname(model_path)) support_path = join(folder, support_filename) weightf_path = join(folder, weightf_filename) paths = [support_path, weightf_path] if any(exists(f) for f in paths): logger.warning("File(s) exist:\n" + "\n".join(paths)) write_smtlib(self.support, support_path) write_smtlib(self.weightfun, weightf_path) varlist = [(v.symbol_name(), v.symbol_type()) for v in self.get_vars()] index = { 'support_path': relpath(support_path, folder), 'weightf_path': relpath(weightf_path, folder), 'variables': varlist, 'bounds': self.bounds } if self.metadata is not None: index['metadata'] = self.metadata with open(model_path, 'wb') as f: pickle.dump(index, f)
def to_file(self, filename: str): def wrap(f): return os.path.join(filename, f) if not os.path.exists(filename): os.makedirs(filename) self.domain.to_file(wrap(self.get_domain_file())) smt.write_smtlib(self.support, wrap(self.get_support_file())) if self.weight: smt.write_smtlib(self.weight, wrap(self.get_weight_file())) for i, query in enumerate(self.queries or []): smt.write_smtlib(query, wrap(self.get_query_file(i)))
import sys import pysmt from pysmt.rewritings import PrenexNormalizer, Ackermannizer from pysmt.smtlib.script import SmtLibScript from pysmt.smtlib.parser import SmtLibParser from pysmt.shortcuts import to_smtlib, write_smtlib from six.moves import cStringIO parser = SmtLibParser() with open("/home/yoniz/git/hermes/dispatcher/dispatcher/examples/Assessment2/nodtbbg.smt2", 'r') as f: smtlib_str = f.read(); stream = cStringIO(smtlib_str) script = parser.get_script(stream) formula = script.get_last_formula() ackermanization = Ackermannizer() ackermized_formula = ackermanization.do_ackermannization(formula) write_smtlib(ackermized_formula, "/home/yoniz/git/hermes/dispatcher/dispatcher/examples/Assessment2/nodtbbg_ack.smt2" )
def run_cegis(program_path, project_path, patch_list): test_output_list = values.LIST_TEST_OUTPUT test_template = reader.collect_specification(test_output_list[0]) binary_dir_path = "/".join(program_path.split("/")[:-1]) time_check = time.time() assertion, largest_path_condition = concolic.run_concolic_exploration( program_path, patch_list) duration = (time.time() - time_check) / 60 values.TIME_TO_EXPLORE = duration emitter.normal("\tcombining explored program paths") if not assertion: patch = patch_list[0] emitter.emit_patch(patch, message="\tfinal patch: ") return program_specification = generator.generate_program_specification( binary_dir_path) complete_specification = And(Not(assertion), program_specification) emitter.normal("\tcomputed the program specification formula") emitter.sub_title("Evaluating Patch Pool") iteration = 0 output_dir = definitions.DIRECTORY_OUTPUT counter_example_list = [] time_check = time.time() values.CONF_TIME_CHECK = None satisfied = utilities.check_budget(values.DEFAULT_TIMEOUT_CEGIS_REFINE) patch_generator = generator.generate_patch(project_path, counter_example_list) count_throw = 0 while not satisfied: iteration = iteration + 1 values.ITERATION_NO = iteration emitter.sub_sub_title("Iteration: " + str(iteration)) patch = next(patch_generator, None) if not patch: emitter.error("[error] cannot generate a patch") patch_formula = app.generator.generate_formula_from_patch(patch) emitter.emit_patch(patch, message="\tgenerated patch: ") patch_formula_extended = generator.generate_extended_patch_formula( patch_formula, largest_path_condition) violation_check = And(complete_specification, patch_formula_extended) if is_sat(violation_check): model = generator.generate_model(violation_check) # print(model) arg_list = values.ARGUMENT_LIST poc_path = values.CONF_PATH_POC values.FILE_POC_GEN = definitions.DIRECTORY_OUTPUT + "/violation-" + str( values.ITERATION_NO) gen_path = values.FILE_POC_GEN input_arg_list, input_var_list = generator.generate_new_input( violation_check, arg_list, poc_path, gen_path) klee_out_dir = output_dir + "/klee-output-" + str(iteration) klee_test_file = output_dir + "/klee-test-" + str(iteration) exit_code = concolic.run_concrete_execution( program_path + ".bc", input_arg_list, True, klee_out_dir) # assert exit_code == 0 emitter.normal("\t\tgenerating new assertion") test_assertion, count_obs = generator.generate_assertion( test_template, klee_out_dir) write_smtlib(test_assertion, klee_test_file) counter_example_list.append((klee_test_file, klee_out_dir)) emitter.highlight("\t\tnew counter-example added") patch = None emitter.highlight("\t\tremoving current patch") count_throw = count_throw + 1 else: klee_test_file = output_dir + "/klee-test-FINAL" # print(to_smtlib(violation_check, False)) write_smtlib(violation_check, klee_test_file) break satisfied = utilities.check_budget(values.DEFAULT_TIMEOUT_CEGIS_REFINE) if satisfied: emitter.warning("\t[warning] ending due to timeout of " + str(values.DEFAULT_TIMEOUT_CEGIS_REFINE) + " minutes") duration = (time.time() - time_check) / 60 values.TIME_TO_REDUCE = duration # patch_list = [patch] # definitions.FILE_PATCH_SET = definitions.DIRECTORY_OUTPUT + "/patch-set-cegis" # writer.write_patch_set(patch_list, definitions.FILE_PATCH_SET) # patch = next(patch_generator, None) # while patch is not None: # patch_formula = app.generator.generate_formula_from_patch(patch) # patch_formula_extended = generator.generate_extended_patch_formula(patch_formula, largest_path_condition) # violation_check = And(complete_specification, patch_formula_extended) # if is_unsat(violation_check): # count_final = count_final + 1 # patch = next(patch_generator, None) emitter.emit_patch(patch, message="\tfinal patch: ") values.COUNT_PATCH_END = values.COUNT_PATCH_START - count_throw
def analyze_rv32_interpreter(program: List[Instruction], bbs: List[BasicBlock]): #print("analyzing rv32 interpreter ...") mk_dot(dot_cfg(bbs), filename="cfg.pdf") #for bb in program: print(bb) # start at MainStart @ 0x0056 start_pc = 0x56 # symbolic instruction: ADD rs2, rs1, rd funct7 = BitVecVal(0, 7) rs2 = Symbol("RV32I_ADD_rs2", BVType(5)) rs1 = Symbol("RV32I_ADD_rs1", BVType(5)) funct3 = BitVecVal(0b00, 3) # ADD rd = Symbol("RV32I_ADD_rd", BVType(5)) opcode = BitVecVal(0b0110011, 7) # OP #RV32I_instr = Symbol("RV32IInstruction", BVType(32)) RV32I_instr = cat(funct7, rs2, rs1, funct3, rd, opcode) print(f"Symbolically executing: {RV32I_instr}") # interpreter orig_state = MachineState().update(PC=BitVecVal(start_pc, 16)) def place_instr(loc, instr, st) -> MachineState: # make sure PC fits into two registers assert loc & 0xffff == loc msb, lsb = BitVecVal(loc >> 8, 8), BitVecVal(loc & 0xff, 8) st = st.update(R=st.R.update(10, lsb).update(11, msb)) instr_parts = [ BVExtract(instr, *jj) for jj in ((jj * 8, jj * 8 + 7) for jj in range(4)) ] if isinstance(loc, int): instr_locs = [loc + ii for ii in range(4)] else: assert False, "TODO: support symbolic address" mem = st.MEM for loc, val in zip(instr_locs, instr_parts): mem = mem.update(loc, val) return st.update(MEM=mem) orig_state = place_instr(loc=0, instr=RV32I_instr, st=orig_state) mf8_ex = SymExec() ex = SymbolicExecutionEngine(program=program, start_state=orig_state, semantics=mf8_ex) print() print() print("SYM EXEC") print("--------") done, end_state = ex.run(max_steps=2000) #ex.print_state() #ex.print_mem(ex.st) #ex.print_path() print(ex.taken) print(f"DONE? {done}") #print("PATHS:") for ii, (cond, st) in enumerate(end_state): print(str(ii) + ") " + cond.serialize()) #ex.print_mem(st) solver = Solver(name="z3", logic=QF_AUFBV) # check for completeness conds = reduce(Or, (cond for cond, st in end_state)) complete = not solver.is_sat(Not(conds)) print(f"Complete? {complete}") # check result of every path: def to_mem_addrs(reg_index): return reversed([0xf100 + reg_index * 8 + jj for jj in range(4)]) def relate_regs(mem, regs): def relate_loc(ii): mem_locs = [ Select(mem, BitVecVal(addr, 16)) for addr in to_mem_addrs(ii) ] return Equals(cat(*mem_locs), Select(regs, BitVecVal(ii, 5))) return reduce(And, [relate_loc(ii) for ii in range(32)]) def name_value(solver, name, val): sym = Symbol(name, val.get_type()) solver.add_assertion(Equals(sym, val)) def locs_to_str(name, array, locs): return "; ".join(f"{name}[{ii:04x}] = 0x{array[ii]:02x}" for ii in sorted(list(set(locs)))) for ii, (cond, end_st) in enumerate(end_state): # create clean slate solver solver = Solver(name="cvc4", logic=QF_AUFBV, generate_models=True) # symbolically execute the RISC-V add regs = Symbol('RV32I_REGS', ArrayType(BVType(5), BVType(32))) regs_n = sym_exec_rsicv_add(rs1=rs1, rs2=rs2, rd=rd, regs=regs) name_value(solver, "DBG_RV32I_REGS_N", regs_n) # add mem to regs relation mem_orig = orig_state.MEM.array() pre = And(And(cond, relate_regs(mem_orig, regs)), Equals(Select(regs, BitVecVal(0, 5)), BitVecVal(0, 32))) mem_n = end_st.MEM.array() post = relate_regs(mem_n, regs_n) # DEBUG: add symbols for every memory write mem_data = end_st._mem._data mem_write_locs = [ Symbol(f"DBG_MF8_MEM_WRITE_LOC_{ii}", BVType(16)) for ii in range(len(mem_data)) ] for sym, (expr, _) in zip(mem_write_locs, mem_data): solver.add_assertion(Equals(sym, expr)) # now check for validity formula = Implies(pre, post) write_smtlib(Not(formula), f"path_{ii:02}.smt2") correct = solver.is_valid(formula) print(f"Correct? {correct}") if not correct: print("Path condition:") print(cond.serialize()) print("Symbolic Mem:") ex.print_mem(end_st) print("Model:") rs1_val = solver.get_value(rs1).bv_unsigned_value() rs2_val = solver.get_value(rs2).bv_unsigned_value() rd_val = solver.get_value(rd).bv_unsigned_value() regs_val = ArrayValue(solver.get_value(regs)) regs_n_val = ArrayValue(solver.get_value(regs_n)) mem_val = ArrayValue(solver.get_value(mem_orig)) mem_n_val = ArrayValue(solver.get_value(mem_n)) reg_addrs = [rd_val, rs1_val, rs2_val] mem_write_locs_vals = [ solver.get_value(ll).bv_unsigned_value() for ll in mem_write_locs ] mem_addrs = reduce(operator.add, [list(to_mem_addrs(ii)) for ii in reg_addrs]) + mem_write_locs_vals print(f"R[{rd_val}] <- R[{rs1_val}] + R[{rs2_val}]") print(f"Pre: {locs_to_str('R', regs_val, reg_addrs)}") print(f" {locs_to_str('M', mem_val, mem_addrs)}") print(f"Post: {locs_to_str('R', regs_n_val, reg_addrs)}") print(f" {locs_to_str('M', mem_n_val, mem_addrs)}") print( f"MEM write addresses: {[f'0x{loc:04x}' for loc in mem_write_locs_vals]}" ) #print(regs_n_val) #print(mem_val) # TODO: check PC post-condition # TODO: add pre and post conditions for program memory equivalence # TODO: add pre and post conditions for data memory equivalence break return
def generate_model(formula): """ This function will invoke PySMT APIs to solve the provided formula and return the byte list of the model Arguments: formula: smtlib formatted formula """ emitter.debug("extracting z3 model") model = get_model(formula) if model is None: return None path_script = "/tmp/z3_script_model" write_smtlib(formula, path_script) with open(path_script, "r") as script_file: script_lines = script_file.readlines() script = "".join(script_lines) var_list = set(re.findall("\(declare-fun (.+?) \(\)", script)) sym_var_list = dict() for var_name in var_list: # sym_var_list[var_name] = dict() if "const_" in var_name and not "const_arr" in var_name: sym_def = Symbol(var_name, BV32) if sym_def not in model: continue x = model[sym_def] byte_list = dict() default_value = x.bv_signed_value() byte_list[0] = default_value else: sym_def = Symbol(var_name, ArrayType(BV32, BV8)) if sym_def not in model: continue x = model[sym_def].simplify() byte_list = dict() value_array_map = x.array_value_assigned_values_map() default_value = int(str(x.array_value_default()).split("_")[0]) if not value_array_map: byte_list[0] = default_value else: for idx, val in value_array_map.items(): index = int(str(idx).split("_")[0]) value = int(str(val).split("_")[0]) byte_list[index] = value max_index = max(list(byte_list.keys())) if var_name in values.LIST_BIT_LENGTH: array_size = values.LIST_BIT_LENGTH[var_name] - 1 if var_name in ["A-data"]: array_size = max_index else: array_size = max_index + 1 # TODO: this could be wrong calculation if max_index == 0: array_size = 2 if var_name not in ["A-data"]: for i in range(0, array_size): if i not in byte_list: byte_list[i] = default_value if var_name not in ["A-data", "A-data-stat"]: for i in range(array_size - 1, -1, -1): if byte_list[i] == 0: byte_list.pop(i) else: break sym_var_list[var_name] = byte_list emitter.data("model var list", sym_var_list) return sym_var_list