def save_weight_to_pkl(self): if not os.path.exists(self.model.weight_dir): os.makedirs(self.model.weight_dir) for name in self.w.keys(): save_pkl(self.w[name].eval(), os.path.join(self.model.weight_dir, "%s.pkl" % name))
def discounted_return(self): if self.model.solver == 'ValueIteration': solver = self.solver_factory(self) self.run_value_iteration(solver, 1) if self.model.save: save_pkl(solver.gamma, os.path.join(self.model.weight_dir, 'VI_planning_horizon_{}.pkl'.format(self.model.planning_horizon))) elif not self.model.use_tf: self.multi_epoch() else: self.multi_epoch_tf() print('\n') console(2, module, 'epochs: ' + str(self.model.n_epochs)) console(2, module, 'ave undiscounted return/step: ' + str(self.experiment_results.undiscounted_return.mean) + ' +- ' + str(self.experiment_results.undiscounted_return.std_err())) console(2, module, 'ave discounted return/step: ' + str(self.experiment_results.discounted_return.mean) + ' +- ' + str(self.experiment_results.discounted_return.std_err())) console(2, module, 'ave time/epoch: ' + str(self.experiment_results.time.mean)) self.logger.info('env: ' + self.model.env + '\t' + 'epochs: ' + str(self.model.n_epochs) + '\t' + 'ave undiscounted return: ' + str(self.experiment_results.undiscounted_return.mean) + ' +- ' + str(self.experiment_results.undiscounted_return.std_err()) + '\t' + 'ave discounted return: ' + str(self.experiment_results.discounted_return.mean) + ' +- ' + str(self.experiment_results.discounted_return.std_err()) + '\t' + 'ave time/epoch: ' + str(self.experiment_results.time.mean))
def save_alpha_vectors(self): if not os.path.exists(self.model.weight_dir): os.makedirs(self.model.weight_dir) av = self.alpha_vectors() save_pkl( av, os.path.join(self.model.weight_dir, "linear_alpha_net_vectors.pkl"))