# Action regressor of Ensemble of ExtraTreesEnsemble # regressor = Ensemble(ExtraTreesRegressor, **regressor_params) regressor = ActionRegressor(regressor, discrete_actions=discrete_actions, tol=5, **regressor_params) dataset = evaluation.collect_episodes(mdp, n_episodes=1000) check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a # check, it can be removed in experiments print('Dataset has %d samples' % dataset.shape[0]) # reward_idx = state_dim + action_dim # sast = np.append(dataset[:, :reward_idx], # dataset[:, reward_idx + reward_dim:-1], # axis=1) # r = dataset[:, reward_idx] sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) fqi_iterations = mdp.horizon # this is usually less than the horizon fqi = FQI(estimator=regressor, state_dim=state_dim, action_dim=action_dim, discrete_actions=discrete_actions, gamma=mdp.gamma, horizon=fqi_iterations, verbose=True) fit_params = {} # fit_params = { # "n_epochs": 300, # "batch_size": 50, # "validation_split": 0.1,
] # Assuming monodimensional, discrete action space action_idx = header.index('A0') selected_states = header[:action_idx] # All states were selected selected_actions_values = np.unique(reduced_dataset[action_idx]) print('Reduced dataset has %d samples' % reduced_dataset.shape[0]) print('Selected states: %s' % selected_states) print('Selected actions: %s' % selected_actions) ### FQI ### if args.fqi: # Dataset parameters for FQI selected_states_dim = len(selected_states) selected_actions_dim = 1 # Assuming monodimensional, discrete action space # Split dataset for FQI sast, r = split_data_for_fqi(reduced_dataset, selected_states_dim, selected_actions_dim, reward_dim) # Action regressor of ExtraTreesRegressor for FQI fqi_regressor_params = { 'n_estimators': 50, 'criterion': 'mse', 'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False, 'n_jobs': args.njobs } regressor = Regressor(regressor_class=ExtraTreesRegressor, **fqi_regressor_params) regressor = ActionRegressor(regressor, discrete_actions=selected_actions_values,
from ifqi.models.mlp import MLP from ifqi.models.linear import Ridge from ifqi.algorithms.pbo.pbo import PBO """ Simple script to quickly run pbo. It solves the LQG environment. """ mdp = envs.LQG1D() state_dim, action_dim, reward_dim = envs.get_space_info(mdp) reward_idx = state_dim + action_dim discrete_actions = np.linspace(-8, 8, 20) dataset = evaluation.collect_episodes(mdp, n_episodes=100) check_dataset(dataset, state_dim, action_dim, reward_dim) sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim) ### Q REGRESSOR ########################## class LQG_Q(): def __init__(self): self.w = np.array([1., 0.]) def predict(self, sa): k, b = self.w #print(k,b) return - b * b * sa[:, 0] * sa[:, 1] - 0.5 * k * sa[:, 1] ** 2 - 0.4 * k * sa[:, 0] ** 2 def get_weights(self): return self.w def get_k(self, omega):
selected_actions = ['A0'] # Assuming monodimensional, discrete action space action_idx = header.index('A0') selected_states = header[:action_idx] # All states were selected selected_actions_values = np.unique(reduced_dataset[action_idx]) print('Reduced dataset has %d samples' % reduced_dataset.shape[0]) print('Selected states: %s' % selected_states) print('Selected actions: %s' % selected_actions) ### FQI ### if args.fqi: # Dataset parameters for FQI selected_states_dim = len(selected_states) selected_actions_dim = 1 # Assuming monodimensional, discrete action space # Split dataset for FQI sast, r = split_data_for_fqi(reduced_dataset, selected_states_dim, selected_actions_dim, reward_dim) # Action regressor of ExtraTreesRegressor for FQI fqi_regressor_params = {'n_estimators': 50, 'criterion': 'mse', 'min_samples_split': 5, 'min_samples_leaf': 2, 'input_scaled': False, 'output_scaled': False, 'n_jobs': args.njobs} regressor = Regressor(regressor_class=ExtraTreesRegressor, **fqi_regressor_params) regressor = ActionRegressor(regressor, discrete_actions=selected_actions_values, tol=0.5, **fqi_regressor_params)