def __init__(self, hyperparams): config = copy.deepcopy(ALG_MF) config.update(hyperparams) Algorithm.__init__(self, config) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU) self.baseline = LinearFeatureBaseline()
def _advance_iteration_variables(self): """ Move all 'cur' variables to 'prev', reinitialize 'cur' variables, and advance iteration counter. """ Algorithm._advance_iteration_variables(self) for m in range(self.M): self.cur[m].traj_info.last_kl_step = \ self.prev[m].traj_info.last_kl_step self.cur[m].pol_info = copy.deepcopy(self.prev[m].pol_info)
def __init__(self, hyperparams): config = copy.deepcopy(ALG_MDGPS) config.update(hyperparams) Algorithm.__init__(self, config) policy_prior = self._hyperparams['policy_prior'] for m in range(self.M): self.cur[m].pol_info = PolicyInfo(self._hyperparams) self.cur[m].pol_info.policy_prior = \ policy_prior['type'](policy_prior) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU)