def __init__( self, env_spec, regressor_args=None, ): Serializable.quick_init(self, locals()) super(GaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim, ), output_dim=1, name="vf", **regressor_args)
def buildGMLP(nonLin): regArgs = {} regArgs['normalize_inputs'] = False regArgs['normalize_outputs'] = False regArgs['hidden_nonlinearity'] = nonLin regArgs['hidden_sizes'] = (64, 64, 8) #only used if adaptive_std == True regArgs['std_hidden_sizes'] = (32, 16, 16) regArgs['adaptive_std'] = False regArgs['learn_std'] = False gMLP_reg = GaussianMLPRegressor(input_shape=(1, ), output_dim=1, name="vf1", **regArgs) return gMLP_reg
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, ): self._subsample_factor = subsample_factor if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs, ), output_dim=1, name="vf", **regressor_args)
def __init__( self, env_spec, subsample_factor=1., num_seq_inputs=1, regressor_args=None, target_key='returns', ): Serializable.quick_init(self, locals()) super(GaussianMLPBaseline, self).__init__(env_spec) if regressor_args is None: regressor_args = dict() self._regressor = GaussianMLPRegressor( input_shape=(env_spec.observation_space.flat_dim * num_seq_inputs,), output_dim=1, name='vf_' + target_key, **regressor_args ) self._target_key = target_key
def __init__( self, env_spec, policy, recurrent=False, predict_all=True, obs_regressed='all', act_regressed='all', use_only_sign=False, noisify_traj_coef=0, optimizer=None, # this defaults to LBFGS regressor_args=None, # here goes all args straight to the regressor: hidden_sizes, TR, step_size.... ): """ :param predict_all: this is only for the recurrent case, to use all hidden states as predictions :param obs_regressed: list of index of the obs variables used to fit the regressor. default string 'all' :param act_regressed: list of index of the act variables used to fit the regressor. default string 'all' :param regressor_args: """ self.env_spec = env_spec self.policy = policy self.latent_dim = policy.latent_dim self.recurrent = recurrent self.predict_all = predict_all self.use_only_sign = use_only_sign self.noisify_traj_coef = noisify_traj_coef self.regressor_args = regressor_args # decide what obs variables will be regressed upon if obs_regressed == 'all': self.obs_regressed = list( range(env_spec.observation_space.flat_dim)) else: self.obs_regressed = obs_regressed # decide what action variables will be regressed upon if act_regressed == 'all': self.act_regressed = list(range(env_spec.action_space.flat_dim)) else: self.act_regressed = act_regressed # shape the input dimension of the NN for the above decisions. self.obs_act_dim = len(self.obs_regressed) + len(self.act_regressed) Serializable.quick_init(self, locals()) # ?? if regressor_args is None: regressor_args = dict() if optimizer == 'first_order': self.optimizer = FirstOrderOptimizer( max_epochs=10, # both of these are to match Rocky's 10 batch_size=128, ) elif optimizer is None: self.optimizer = None else: raise NotImplementedError if policy.latent_name == 'bernoulli': if self.recurrent: self._regressor = BernoulliRecurrentRegressor( input_shape=(self.obs_act_dim, ), output_dim=policy.latent_dim, optimizer=self.optimizer, predict_all=self.predict_all, **regressor_args) else: self._regressor = BernoulliMLPRegressor( input_shape=(self.obs_act_dim, ), output_dim=policy.latent_dim, optimizer=self.optimizer, **regressor_args) elif policy.latent_name == 'categorical': if self.recurrent: self._regressor = CategoricalRecurrentRegressor( # not implemented input_shape=(self.obs_act_dim, ), output_dim=policy.latent_dim, optimizer=self.optimizer, # predict_all=self.predict_all, **regressor_args) else: self._regressor = CategoricalMLPRegressor( input_shape=(self.obs_act_dim, ), output_dim=policy.latent_dim, optimizer=self.optimizer, **regressor_args) elif policy.latent_name == 'normal': self._regressor = GaussianMLPRegressor( input_shape=(self.obs_act_dim, ), output_dim=policy.latent_dim, optimizer=self.optimizer, **regressor_args) else: raise NotImplementedError