from trlib.environments.dam import Dam from trlib.policies.valuebased import EpsilonGreedy from trlib.policies.qfunction import ZeroQ from trlib.algorithms.reinforcement.fqi import FQI from sklearn.ensemble.forest import ExtraTreesRegressor from trlib.experiments.results import Result from trlib.experiments.visualization import plot_average from trlib.algorithms.callbacks import get_callback_list_entry import numpy as np from trlib.experiments.experiment import RepeatExperiment from trlib.environments.puddleworld import PuddleWorld from trlib.utilities.data import save_object from trlib.utilities.evaluation import evaluate_policy source_mdp_1 = PuddleWorld(goal_x=5, goal_y=10, puddle_slow=False) source_mdp_2 = PuddleWorld(goal_x=5, goal_y=10, puddle_means=[(2.0, 2.0), (4.0, 6.0), (1.0, 8.0), (2.0, 4.0), (8.5, 7.0), (8.5, 5.0)], puddle_var=[(.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8)], puddle_slow=False) source_mdp_3 = PuddleWorld(goal_x=7, goal_y=10, puddle_means=[(8.0, 2.0), (1.0, 10.0), (1.0, 8.0), (6.0, 6.0), (6.0, 4.0)], puddle_var=[(.7, 1.e-5, 1.e-5, .7),
from trlib.environments.dam import Dam from trlib.policies.valuebased import EpsilonGreedy from trlib.policies.qfunction import ZeroQ from sklearn.ensemble.forest import ExtraTreesRegressor from trlib.algorithms.callbacks import get_callback_list_entry import numpy as np from trlib.experiments.experiment import RepeatExperiment from trlib.utilities.data import load_object from trlib.algorithms.transfer.wfqi import WFQI, estimate_weights_mean from sklearn.gaussian_process.kernels import RBF, WhiteKernel from trlib.environments.puddleworld import PuddleWorld from trlib.utilities.wfqi_utils import estimate_ideal_weights from trlib.policies.policy import Uniform """ --- ENVIRONMENTS --- """ target_mdp = PuddleWorld(goal_x=5,goal_y=10, puddle_means=[(1.0,4.0),(1.0, 10.0), (1.0, 8.0), (6.0,6.0),(6.0,4.0)], puddle_var=[(.7, 1.e-5, 1.e-5, .7), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8),(.8, 1.e-5, 1.e-5, .8)], puddle_slow = False) actions = [0, 1, 2, 3] source_data = [load_object("source_data_" + str(i)) for i in [1,2,3]] """ --- PARAMS --- """ uniform_policy = Uniform(actions) regressor_params = {'n_estimators': 50, 'criterion': 'mse', 'min_samples_split':2, 'min_samples_leaf': 1} initial_states = [np.array([0.,0.]) for _ in range(5)]