def run_iters(self): T = self.params['t'] results = { 'rewards': [], 'sup_rewards': [], 'surr_losses': [], 'sup_losses': [], 'sim_errs': [] } trajs = [] beta = self.params['beta'] snapshots = [] for i in range(self.params['iters'][-1]): print("\tIteration: " + str(i)) if i in self.params['update']: self.lnr.train(verbose=True) if i == 0: states, i_actions, _, _ = statistics.collect_traj( self.env, self.sup, T, False) trajs.append((states, i_actions)) states, i_actions = utils.filter_data(self.params, states, i_actions) self.lnr.add_data(states, i_actions) self.lnr.train() else: states, _, _, _ = statistics.collect_traj_beta( self.env, self.sup, self.lnr, T, beta, False) i_actions = [self.sup.intended_action(s) for s in states] states, i_actions = utils.filter_data(self.params, states, i_actions) self.lnr.add_data(states, i_actions) beta = beta * beta if ((i + 1) in self.params['iters']): snapshots.append((self.lnr.X[:], self.lnr.y[:])) for j in range(len(snapshots)): X, y = snapshots[j] self.lnr.X, self.lnr.y = X, y self.lnr.train(verbose=True) print("\nData from snapshot: " + str(self.params['iters'][j])) it_results = self.iteration_evaluation() results['sup_rewards'].append(it_results['sup_reward_mean']) results['rewards'].append(it_results['reward_mean']) results['surr_losses'].append(it_results['surr_loss_mean']) results['sup_losses'].append(it_results['sup_loss_mean']) results['sim_errs'].append(it_results['sim_err_mean']) for key in results.keys(): results[key] = np.array(results[key]) return results
def run_iters(self): T = self.params['t'] results = { 'rewards': [], 'sup_rewards': [], 'surr_losses': [], 'sup_losses': [], 'sim_errs': [] } trajs = [] d = self.params['d'] new_cov = np.random.normal(0, 1, (d, d)) new_cov = new_cov.T.dot(new_cov) new_cov = new_cov / np.trace(new_cov) * self.params['trace'] self.sup = GaussianSupervisor(self.net_sup, new_cov) snapshots = [] for i in range(self.params['iters'][-1]): print("\tIteration: " + str(i)) states, i_actions, _, _ = statistics.collect_traj( self.env, self.sup, T, False) trajs.append((states, i_actions)) states, i_actions = utils.filter_data(self.params, states, i_actions) self.lnr.add_data(states, i_actions) if ((i + 1) in self.params['iters']): snapshots.append((self.lnr.X[:], self.lnr.y[:])) for j in range(len(snapshots)): X, y = snapshots[j] self.lnr.X, self.lnr.y = X, y self.lnr.train(verbose=True) print("\nData from snapshot: " + str(self.params['iters'][j])) it_results = self.iteration_evaluation() results['sup_rewards'].append(it_results['sup_reward_mean']) results['rewards'].append(it_results['reward_mean']) results['surr_losses'].append(it_results['surr_loss_mean']) results['sup_losses'].append(it_results['sup_loss_mean']) results['sim_errs'].append(it_results['sim_err_mean']) for key in results.keys(): results[key] = np.array(results[key]) return results