示例#1
0
from trlib.environments.dam import Dam
from trlib.policies.valuebased import EpsilonGreedy
from trlib.policies.qfunction import ZeroQ
from trlib.algorithms.reinforcement.fqi import FQI
from sklearn.ensemble.forest import ExtraTreesRegressor
from trlib.experiments.results import Result
from trlib.experiments.visualization import plot_average
from trlib.algorithms.callbacks import get_callback_list_entry
import numpy as np
from trlib.experiments.experiment import RepeatExperiment
from trlib.environments.puddleworld import PuddleWorld
from trlib.utilities.data import save_object
from trlib.utilities.evaluation import evaluate_policy

source_mdp_1 = PuddleWorld(goal_x=5, goal_y=10, puddle_slow=False)
source_mdp_2 = PuddleWorld(goal_x=5,
                           goal_y=10,
                           puddle_means=[(2.0, 2.0), (4.0, 6.0), (1.0, 8.0),
                                         (2.0, 4.0), (8.5, 7.0), (8.5, 5.0)],
                           puddle_var=[(.8, 1.e-5, 1.e-5, .8),
                                       (.8, 1.e-5, 1.e-5, .8),
                                       (.8, 1.e-5, 1.e-5, .8),
                                       (.8, 1.e-5, 1.e-5, .8),
                                       (.8, 1.e-5, 1.e-5, .8),
                                       (.8, 1.e-5, 1.e-5, .8)],
                           puddle_slow=False)
source_mdp_3 = PuddleWorld(goal_x=7,
                           goal_y=10,
                           puddle_means=[(8.0, 2.0), (1.0, 10.0), (1.0, 8.0),
                                         (6.0, 6.0), (6.0, 4.0)],
                           puddle_var=[(.7, 1.e-5, 1.e-5, .7),
示例#2
0
from trlib.environments.dam import Dam
from trlib.policies.valuebased import EpsilonGreedy
from trlib.policies.qfunction import ZeroQ
from sklearn.ensemble.forest import ExtraTreesRegressor
from trlib.algorithms.callbacks import get_callback_list_entry
import numpy as np
from trlib.experiments.experiment import RepeatExperiment
from trlib.utilities.data import load_object
from trlib.algorithms.transfer.wfqi import WFQI, estimate_weights_mean
from sklearn.gaussian_process.kernels import RBF, WhiteKernel
from trlib.environments.puddleworld import PuddleWorld
from trlib.utilities.wfqi_utils import estimate_ideal_weights
from trlib.policies.policy import Uniform

""" --- ENVIRONMENTS --- """
target_mdp = PuddleWorld(goal_x=5,goal_y=10, puddle_means=[(1.0,4.0),(1.0, 10.0), (1.0, 8.0), (6.0,6.0),(6.0,4.0)], 
                               puddle_var=[(.7, 1.e-5, 1.e-5, .7), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8), (.8, 1.e-5, 1.e-5, .8),(.8, 1.e-5, 1.e-5, .8)], puddle_slow = False)

actions = [0, 1, 2, 3]
source_data = [load_object("source_data_" + str(i)) for i in [1,2,3]]

""" --- PARAMS --- """

uniform_policy = Uniform(actions)

regressor_params = {'n_estimators': 50,
                    'criterion': 'mse',
                    'min_samples_split':2,
                    'min_samples_leaf': 1}

initial_states = [np.array([0.,0.]) for _ in range(5)]