def evaluate(self, filename, proof_name=None): if self.model is not None: self.model.eval() if 'hammer' in self.opts.method: for atp in ['Vampire', 'Z3', 'CVC4', 'Eprover']: if ('hammer_' + atp) in self.opts.method: with_hammer = atp self.opts.method = self.opts.method.replace( 'hammer_' + atp, 'hammer') break else: with_hammer = 'All' else: with_hammer = None assert 'hammer_' not in self.opts.method hammer_timeout = self.opts.hammer_timeout if 'ours' in self.opts.method else self.opts.timeout with FileEnv(filename, self.opts.max_num_tactics, self.opts.timeout, with_hammer=with_hammer, hammer_timeout=hammer_timeout) as file_env: results = [] pprint("number of proofs in " + filename + ' is ' + str(len(file_env.proofs))) for proof_env in file_env: # start a proof if proof_name is not None and proof_env.proof[ 'name'] != proof_name: continue print('proof: ', proof_env.proof['name']) # extensional_propN_Proper if proof_env.proof['name'] == "prevLog_leader_sublog_init": print('!') # else: # continue #print('cuda memory allocated before proof: ', torch.cuda.memory_allocated(self.opts.device), file=sys.stderr) success, proof_pred, time, num_tactics = self.prove(proof_env) results.append({ 'filename': filename, 'proof_name': proof_env.proof['name'], 'success': success, 'proof_gt': [ step['command'][0] for step in proof_env.proof['steps'] if step['command'][1] != 'VernacEndProof' ], 'proof_pred': proof_pred, 'time': time, 'num_tactics': num_tactics, }) if proof_name is not None: break return results
def evaluate(self, filename, proof_name=None): if self.model is not None: self.model.eval() if "hammer" in self.opts.method: for atp in ["Vampire", "Z3", "CVC4", "Eprover"]: if ("hammer_" + atp) in self.opts.method: with_hammer = atp self.opts.method = self.opts.method.replace( "hammer_" + atp, "hammer" ) break else: with_hammer = "All" else: with_hammer = None assert "hammer_" not in self.opts.method hammer_timeout = ( self.opts.hammer_timeout if "ours" in self.opts.method else self.opts.timeout ) with FileEnv( filename, self.opts.max_num_tactics, self.opts.timeout, with_hammer=with_hammer, hammer_timeout=hammer_timeout, ) as file_env: results = [] for proof_env in file_env: # start a proof if proof_name is not None and proof_env.proof["name"] != proof_name: continue print("proof: ", proof_env.proof["name"]) # print('cuda memory allocated before proof: ', torch.cuda.memory_allocated(self.opts.device), file=sys.stderr) success, proof_pred, time, num_tactics = self.prove(proof_env) results.append( { "filename": filename, "proof_name": proof_env.proof["name"], "success": success, "proof_gt": [ step["command"][0] for step in proof_env.proof["steps"] if step["command"][1] != "VernacEndProof" ], "proof_pred": proof_pred, "time": time, "num_tactics": num_tactics, } ) if proof_name is not None: break return results
def gloop_evaluate(self, filename, proof_name=None): if self.model is not None: self.model.eval() if 'hammer' in self.opts.method: for atp in ['Vampire', 'Z3', 'CVC4', 'Eprover']: if ('hammer_' + atp) in self.opts.method: with_hammer = atp self.opts.method = self.opts.method.replace( 'hammer_' + atp, 'hammer') break else: with_hammer = 'All' else: with_hammer = None assert 'hammer_' not in self.opts.method hammer_timeout = self.opts.hammer_timeout if 'ours' in self.opts.method else self.opts.timeout with FileEnv(filename, self.opts.max_num_tactics, self.opts.timeout, with_hammer=with_hammer, hammer_timeout=hammer_timeout) as file_env: results = [] # Combine constants, inductives, and foreground goals proof_env = file_env.coagulated_env() if proof_name is not None and proof_env.proof['name'] != proof_name: return results print('proof: ', proof_env.proof['name']) # print('cuda memory allocated before proof: ', torch.cuda.memory_allocated(self.opts.device), file=sys.stderr) success, proof_pred, time, num_tactics = self.prove(proof_env) # Append separate proof per goal n coagulated environment results.append({ 'filename': filename, 'proof_name': proof_env.proof['name'], 'success': success, 'proof_gt': [ step['command'][0] for step in proof_env.proof['steps'] if step['command'][1] != 'VernacEndProof' ], 'proof_pred': proof_pred, 'time': time, 'num_tactics': num_tactics, }) if proof_name is not None: return results return results
def evaluate_similar(self, filename, target_goals, learn_from): if self.model is not None: self.model.eval() with FileEnv( filename, self.opts.max_num_tactics, self.opts.timeout, with_hammer=None, hammer_timeout=hammer_timeout, ) as file_env: results = [] for proof_env in file_env: # start a proof """ if proof_name is not None and proof_env.proof["name"] != proof_name: continue """ if learn_from.count(proof_env.proof["name"]) > 0: # record print("record") return elif target_goals[0] == proof_env.proof["name"]: # action print("run similar") return target_goals.pop(0) learn_from.pop(0) print("proof: ", proof_env.proof["name"]) # print('cuda memory allocated before proof: ', torch.cuda.memory_allocated(self.opts.device), file=sys.stderr) success, proof_pred, time, num_tactics = self.prove(proof_env) results.append( { "filename": filename, "proof_name": proof_env.proof["name"], "success": success, "proof_gt": [ step["command"][0] for step in proof_env.proof["steps"] if step["command"][1] != "VernacEndProof" ], "proof_pred": proof_pred, "time": time, "num_tactics": num_tactics, } ) return results
def async_trajectories(self, *args): """ Collects an epoch worth of data Epoch = [1 rollout per proof_env] Waits until the `done` event passed in as an arg is set(). Otherwise data cannot be retrieved from the IPC file descriptor, possibly for refcount deletion """ pid, queue, done = args[0], args[1], args[2] print("{}: started collection".format(pid)) for fenvargs in self.file_env_args: # try: with FileEnv(*fenvargs) as fenv: prob_grads = None for proof_env in fenv: self.agent.optimizer.zero_grad() # Collect data we can backprop data = self.agent.sample_once(proof_env, self.tac_template, train=True) trajectory, results, exp = data['samples'], data[ 'results'], data['exp'] collected = len(trajectory) fg_goals, bg_goals, shelved, given_up = proof_env.serapi.query_goals( ) len_fg_bg = (len(fg_goals), len(bg_goals)) # Backpropagate loss losses = torch.cat([ (prob * -r).unsqueeze(0) for prob, r in trajectory ]).to(trajectory[0][0].device) loss = torch.mean(losses) loss.backward( ) # loss.backward(retain_graph=True) is VERY expensive model_grads = [ p.grad if p.grad is not None else None for p in self.agent.model.parameters() ] grads = {'model': model_grads, 'RND': None} if self.agent.opts.RND and exp['exp_avg'] is not None: # rnd_loss = exp['exp_avg'] # rnd_loss.backward() exp['exp_avg'] = exp['exp_avg'].item() # rnd_grads = [p.grad if p.grad is not None else None for p in self.agent.RND_train.parameters()] # grads['RND'] = rnd_grads['grads'] grads['RND'] = exp['grads'] print("{}: collected {}".format(pid, collected)) print("{}: results {}".format(pid, results)) queue.put({ 'grads': grads, 'collected': collected, 'results': results, 'loss': loss.detach().item(), "len_fg_bg": len_fg_bg, 'exp_bonuses': exp, 'proof_name': proof_env.name() }) # except Exception as e: # print("{}: ERROR-{}".format(pid,e)) queue.put(None) print("{}: finished & waiting".format(pid)) done.wait() del self.agent.model torch.cuda.empty_cache() sys.exit(0)
def train_RL_DFS(self, logger, n_epoch, file_list, with_hammer, hammer_timeout): """ TODO: put in `RL_Trainer` sort of file ...? Collects samples & updates the model in accordance with DFS sampling """ print("Making sure save folder exists...") os.makedirs(self.true_logdir, exist_ok=True) print("+ Good to go +") tac_template = self.get_tac_template() last_ep = 0 try: for curr_epoch in range(n_epoch): start = time.time() last_ep = curr_epoch print("\n-------------EPOCH-------------") print("---------------{}---------------\n".format(curr_epoch)) losses = [] expl_bonuses = {} results = [] for filename in file_list: with FileEnv(filename, self.opts.max_num_tactics, self.opts.timeout, with_hammer=with_hammer) as file_env: for proof_env in file_env: # start a proof curr_name = proof_env.proof['name'] print('proof: ', proof_env.proof['name']) # success, proof_pred, time, num_tactics, trajectory = self.prove(proof_env, train=True) samples, result, expl_bonus = \ self.sample_DFS(proof_env, tac_template) # TODO: control number of samples better losses_env = torch.cat([ ((-logprob) * (reward)).unsqueeze(0) for logprob, reward in samples ]) dictionary_vals = {} for name, param in self.model.named_parameters(): dictionary_vals[name] = param graph = make_dot(samples[0][0], dictionary_vals) graph.save('test-out/yeehaw.gv') pdb.set_trace() losses.append(torch.mean(losses_env)) expl_bonus['added'] = 0 if self.opts.RND: expl_bonus['added'] = 1 del expl_bonus['grads'] expl_bonuses[curr_name] = expl_bonus results.append(result) loss = sum(losses) / len(losses) print("\tLoss: {}".format(loss.item())) self.optimizer.zero_grad() loss.backward() self.optimizer.step() self._log_epoch(logger, curr_epoch, start, results, 1, losses, [1, 1], expl_bonuses) if self.opts.RND and expl_bonus['exp_avg'] is not None: self.RND_optimizer.zero_grad() bonus_loss = sum([ b['exp_avg'] for k, b in expl_bonuses.items() if b['exp_avg'] is not None ]) / len(expl_bonuses) bonus_loss.backward() self.RND_optimizer.step() except KeyboardInterrupt as kb: print("Excepted kb interrupt") print("Saving model*") self.save(last_ep, self.true_logdir) print("-Saved model-") return results
def __init__(self, filename, proof_name): self.file_env = FileEnv(filename, 500, 1000) self.proof_env = None for proof_env in self.file_env: # start a proof if proof_env.proof['name'] == proof_name: self.proof_env = proof_env
if __name__ == "__main__": import time import statistics import pickle # benchmark the RL environment files = [f for f in glob.glob("../data/*/*.json")] # record the time it takes for a step in the RL env. step_time = [] for f in files: # extract proof names & proof steps (tacs) file_env = FileEnv(f, max_num_tactics=100, timeout=600) names = [pf['name'] for pf in file_env.proofs] tacs = [[step['command'][0] for step in pf['steps']] for pf in file_env.proofs] for (i, name) in enumerate(names): # extract tacs for each proof try: env = RLEnv(f, name) except serapi.CoqExn: print("CoqExn") continue for tac in tacs[i]: start = time.time() try: reword = env.step(tac) except: