from ifqi.models.actionregressor import ActionRegressor from ifqi.models.regressor import Regressor from ifqi.models.mlp import MLP from ifqi.models.ensemble import Ensemble """ Simple script to quickly run fqi. It solves the Acrobot environment according to the experiment presented in: Ernst, Damien, Pierre Geurts, and Louis Wehenkel. "Tree-based batch mode reinforcement learning." Journal of Machine Learning Research 6.Apr (2005): 503-556. """ mdp = envs.CarOnHill() state_dim, action_dim, reward_dim = envs.get_space_info(mdp) assert reward_dim == 1 regressor_params = {'n_estimators': 50, 'criterion': 'mse', 'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False} discrete_actions = mdp.action_space.values # ExtraTrees regressor = Regressor(ExtraTreesRegressor, **regressor_params) # Action regressor of Ensemble of ExtraTreesEnsemble # regressor = Ensemble(ExtraTreesRegressor, **regressor_params) regressor = ActionRegressor(regressor, discrete_actions=discrete_actions,
from ifqi import envs from ifqi.evaluation import evaluation from ifqi.evaluation.utils import check_dataset, split_data_for_fqi from ifqi.models.regressor import Regressor from ifqi.models.mlp import MLP from ifqi.models.linear import Ridge from ifqi.algorithms.pbo.pbo import PBO """ Simple script to quickly run pbo. It solves the LQG environment. """ mdp = envs.LQG1D() state_dim, action_dim, reward_dim = envs.get_space_info(mdp) reward_idx = state_dim + action_dim discrete_actions = np.linspace(-8, 8, 20) dataset = evaluation.collect_episodes(mdp, n_episodes=100) check_dataset(dataset, state_dim, action_dim, reward_dim) sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) ### Q REGRESSOR ########################## class LQG_Q(): def __init__(self): self.w = np.array([1., 0.]) def predict(self, sa): k, b = self.w #print(k,b) return - b * b * sa[:, 0] * sa[:, 1] - 0.5 * k * sa[:, 1] ** 2 - 0.4 * k * sa[:, 0] ** 2