Exemplo n.º 1
0
                    'criterion': 'mse',
                    'min_samples_split': 5,
                    'min_samples_leaf': 2,
                    'input_scaled': False,
                    'output_scaled': False}
discrete_actions = mdp.action_space.values

# ExtraTrees
regressor = Regressor(ExtraTreesRegressor, **regressor_params)

# Action regressor of Ensemble of ExtraTreesEnsemble
# regressor = Ensemble(ExtraTreesRegressor, **regressor_params)
regressor = ActionRegressor(regressor, discrete_actions=discrete_actions,
                            tol=5, **regressor_params)

dataset = evaluation.collect_episodes(mdp, n_episodes=1000)
check_dataset(dataset, state_dim, action_dim, reward_dim) # this is just a
# check, it can be removed in experiments
print('Dataset has %d samples' % dataset.shape[0])

# reward_idx = state_dim + action_dim
# sast = np.append(dataset[:, :reward_idx],
#                  dataset[:, reward_idx + reward_dim:-1],
#                  axis=1)
# r = dataset[:, reward_idx]
sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim)

fqi_iterations = mdp.horizon  # this is usually less than the horizon
fqi = FQI(estimator=regressor,
          state_dim=state_dim,
          action_dim=action_dim,
Exemplo n.º 2
0
                                **regressor_params)

# Load FQI
fqi = FQI(estimator=regressor,
          state_dim=state_dim,
          action_dim=action_dim,
          discrete_actions=discrete_actions,
          gamma=config['fqi']['gamma'],
          horizon=config['fqi']['horizon'],
          features=config['fqi']['features'],
          verbose=config['fqi']['verbose'])
fit_params = config['fit_params']

# Load dataset
dataset = evaluation.collect_episodes(
    mdp,
    n_episodes=config['experiment_setting']['evaluation']['n_episodes'][-1])
print('Dataset has %d samples' % dataset.shape[0])

# Load initial state to start evaluation episodes. This is the only setting
# to be chosen outside the configuration file.
# IF MULTIPLE EXPERIMENTS ARE TO BE PERFORMED STARTING FROM THE SAME
# INITIAL STATE, USE AN ARRAY WITH THE SAME INITIAL STATE REPEATED FOR THE
# DESIRED NUMBER OF EVALUATION RUNS.
initial_states = np.zeros((41, 4))
initial_states[:, 0] = np.linspace(-2, 2, 41)
######################################################################
######################################################################

experiment_results = list()
results = list()
Exemplo n.º 3
0
from ifqi.evaluation.utils import check_dataset, split_data_for_fqi
from ifqi.models.regressor import Regressor
from ifqi.models.mlp import MLP
from ifqi.models.linear import Ridge
from ifqi.algorithms.pbo.pbo import PBO

"""
Simple script to quickly run pbo. It solves the LQG environment.

"""

mdp = envs.LQG1D()
state_dim, action_dim, reward_dim = envs.get_space_info(mdp)
reward_idx = state_dim + action_dim
discrete_actions = np.linspace(-8, 8, 20)
dataset = evaluation.collect_episodes(mdp, n_episodes=100)
check_dataset(dataset, state_dim, action_dim, reward_dim)
sast, r = split_data_for_fqi(dataset, state_dim, action_dim, reward_dim)

### Q REGRESSOR ##########################
class LQG_Q():
    def __init__(self):
        self.w = np.array([1., 0.])

    def predict(self, sa):
        k, b = self.w
        #print(k,b)
        return - b * b * sa[:, 0] * sa[:, 1] - 0.5 * k * sa[:, 1] ** 2 - 0.4 * k * sa[:, 0] ** 2

    def get_weights(self):
        return self.w
Exemplo n.º 4
0
    regressor = Ensemble(regressor_class, **regressor_params)
else:
    regressor = Regressor(regressor_class, **regressor_params)
if not config['model']['fit_actions']:
    regressor = ActionRegressor(regressor, discrete_actions=discrete_actions,
                                tol=5, **regressor_params)

results = list()
# Run
for e in range(config['experiment_setting']['evaluation']['n_experiments']):
    print('Experiment: %d' % (e + 1))
    experiment_results = list()

    # Load dataset
    dataset = evaluation.collect_episodes(
        mdp, n_episodes=np.sort(config['experiment_setting']['evaluation']
                                ['n_episodes'])[-1])
    print('Dataset has %d samples' % dataset.shape[0])

    # Load FQI
    fqi = FQI(estimator=regressor,
              state_dim=state_dim,
              action_dim=action_dim,
              discrete_actions=discrete_actions,
              gamma=config['fqi']['gamma'],
              horizon=config['fqi']['horizon'],
              verbose=config['fqi']['verbose'])
    fit_params = config['fit_params']

    if config['experiment_setting']['evaluation']['metric'] == 'n_episodes':
        for i in config['experiment_setting']['evaluation']['n_episodes']: