Пример #1
0
 def __init__(self, env_spec, dynamics_model: ModelEnsemble,
              model_free_algo: ModelFreeAlgo,
              config_or_config_dict: (DictConfig, dict),
              name='sample_with_dynamics'
              ):
     if not isinstance(dynamics_model.model[0], ContinuousMLPGlobalDynamicsModel):
         raise TypeError("Model ensemble elements should be of type ContinuousMLPGlobalDynamicsModel")
     super().__init__(env_spec, dynamics_model, name)
     config = construct_dict_config(config_or_config_dict, self)
     parameters = Parameters(parameters=dict(),
                             name='dyna_param',
                             source_config=config)
     sub_placeholder_input_list = []
     if isinstance(dynamics_model, PlaceholderInput):
         sub_placeholder_input_list.append(dict(obj=dynamics_model,
                                                attr_name='dynamics_model'))
     if isinstance(model_free_algo, PlaceholderInput):
         sub_placeholder_input_list.append(dict(obj=model_free_algo,
                                                attr_name='model_free_algo'))
     self.model_free_algo = model_free_algo
     self.config = config
     self.parameters = parameters
     self.result = list()
     self.validation_result = [0] * len(dynamics_model)
     self._dynamics_model.__class__ = ModelEnsemble
Пример #2
0
    def __init__(self,
                 train_sample_count_func,
                 config_or_config_dict: (DictConfig, dict),
                 func_dict: dict
                 ):
        super(DDPG_TrainTestFlow, self).__init__(func_dict=func_dict)
        config = construct_dict_config(config_or_config_dict, obj=self)
        self.parameters = Parameters(source_config=config, parameters=dict())
        if train_sample_count_func:
            assert callable(train_sample_count_func)

        self.env = self.parameters('env')
        self.env_spec = self.env.env_spec
        self.agent = self.parameters('agent')
        self.cyber = self.parameters('cyber')
        self.total_steps = self.parameters('total_steps')
        self.max_step_per_episode = self.parameters('max_step_per_episode')
        self.train_after_step = self.parameters('train_after_step')
        self.train_every_step = self.parameters('train_every_step')
        self.test_after_step = self.parameters('test_after_step')
        self.test_every_step = self.parameters('test_every_step')
        self.num_test = self.parameters('num_test')
        self.test_reward = []
        self.data_sample = []
        self.step_counter = SinglentonStepCounter(-1)
Пример #3
0
    def __init__(
            self,
            train_sample_count_func,
            config_or_config_dict: (DictConfig, dict),
            func_dict: dict,
    ):
        """

        :param train_sample_count_func: a function indicates how much training samples the agent has collected currently.
        :type train_sample_count_func: method
        :param config_or_config_dict: a Config or a dict should have the keys: (TEST_EVERY_SAMPLE_COUNT, TRAIN_EVERY_SAMPLE_COUNT, START_TRAIN_AFTER_SAMPLE_COUNT, START_TEST_AFTER_SAMPLE_COUNT)
        :type config_or_config_dict: Config or dict
        :param func_dict: function dict, holds the keys: 'sample', 'train', 'test'. each item in the dict as also should be a dict, holds the keys 'func', 'args', 'kwargs'
        :type func_dict: dict
        """
        super().__init__(func_dict)
        super(DynaFlow, self).__init__(func_dict=func_dict)
        config = construct_dict_config(config_or_config_dict, obj=self)
        self.parameters = Parameters(source_config=config, parameters=dict())
        self.time_step_func = train_sample_count_func
        self._last_train_algo_point = -1
        self._last_train_algo_point_from_dynamics = -1
        self._last_test_algo_point = -1
        self._last_train_dynamics_point = -1
        self._last_test_dynamics_point = -1
        assert callable(train_sample_count_func)
Пример #4
0
 def __init__(self, env_spec: EnvSpec, config_or_config_dict: (DictConfig, dict), name='policy'):
     config = construct_dict_config(config_or_config_dict, self)
     parameters = Parameters(parameters=dict(),
                             source_config=config)
     assert env_spec.action_space.contains(x=config('ACTION_VALUE'))
     super().__init__(env_spec, parameters, name)
     self.config = config
Пример #5
0
 def __init__(self,
              env_spec: EnvSpec,
              batch_data: TransitionData = None,
              epsilon=inf,
              init_sequential=False,
              eigreg=False,
              warmstart=True,
              name_scope='gp_dynamics_model',
              min_samples_per_cluster=40,
              max_clusters=20,
              strength=1,
              name='gp_dynamics_model'):
     parameters = Parameters(
         dict(min_samp=min_samples_per_cluster,
              max_samples=inf,
              max_clusters=max_clusters,
              strength=strength,
              init_sequential=init_sequential,
              eigreg=eigreg,
              warmstart=warmstart))
     super().__init__(env_spec=env_spec, parameters=parameters, name=name)
     self.name_scope = name_scope
     self.batch_data = batch_data
     self.gmm_model = GMM(epsilon=epsilon,
                          init_sequential=init_sequential,
                          eigreg=eigreg,
                          warmstart=warmstart)
     self.X, self.U = None, None
Пример #6
0
 def create_parameters(self):
     parameters = dict(param1='aaaa',
                       param2=12312,
                       param3=np.random.random([4, 2]))
     source_config, _ = self.create_dict_config()
     a = Parameters(parameters=parameters,
                    source_config=source_config,
                    name='test_params')
     return a, locals()
Пример #7
0
 def __init__(self, env_spec: EnvSpec, T: int, cost_fn: CostFunc,
              dynamics: LinearDynamicsModel):
     param = Parameters(parameters=dict(T=T))
     super().__init__(env_spec, param)
     self.dynamics = dynamics
     self.Lqr_instance = LQR(env_spec=env_spec,
                             T=self.parameters('T'),
                             dyna_model=dynamics,
                             cost_fn=cost_fn)
Пример #8
0
    def __init__(self, action_space: Space, init_random_prob: float, prob_scheduler: Schedule = None):
        super(ExplorationStrategy, self).__init__()

        self.action_space = action_space
        self.random_prob_func = lambda: init_random_prob
        if prob_scheduler:
            self.random_prob_func = prob_scheduler.value

        self.parameters = Parameters(parameters=dict(random_prob_func=self.random_prob_func),
                                     name='eps_greedy_params')
Пример #9
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: (Env, Wrapper),
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            reset_noise_every_terminal_state=False,
            reset_state_every_sample=False,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param reset_noise_every_terminal_state: reset the noise every sampled trajectory
        :type reset_noise_every_terminal_state: bool
        :param reset_state_every_sample: reset the state everytime perofrm the sample/rollout
        :type reset_state_every_sample: bool
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict(
            reset_noise_every_terminal_state=reset_noise_every_terminal_state,
            reset_state_every_sample=reset_state_every_sample))
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        if sampler is None:
            sampler = Sampler()
        self.sampler = sampler
        self.recorder = Recorder(default_obj=self)
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Пример #10
0
 def __init__(self,
              env_spec: EnvSpec,
              state_transition_matrix: np.array,
              bias: np.array,
              init_state=None,
              name='dynamics_model'):
     parameters = Parameters(
         parameters=dict(F=state_transition_matrix, f=bias))
     super().__init__(env_spec, parameters, init_state, name)
     assert self.parameters('F').shape == \
            (env_spec.obs_space.flat_dim, env_spec.obs_space.flat_dim + env_spec.action_space.flat_dim)
     assert self.parameters('f').shape[0] == env_spec.obs_space.flat_dim
Пример #11
0
 def __init__(
         self,
         train_sample_count_func,
         config_or_config_dict: (DictConfig, dict),
         func_dict: dict,
 ):
     super(TrainTestFlow, self).__init__(func_dict=func_dict)
     config = construct_dict_config(config_or_config_dict, obj=self)
     self.parameters = Parameters(source_config=config, parameters=dict())
     self.time_step_func = train_sample_count_func
     self.last_train_point = -1
     self.last_test_point = -1
     assert callable(train_sample_count_func)
Пример #12
0
    def __init__(self, env_spec: EnvSpec, name: str = 'algo'):
        """
        Constructor

        :param env_spec: environment specifications
        :type env_spec: EnvSpec
        :param name: name of the algorithm
        :type name: str
        """

        super().__init__(status=StatusWithSubInfo(obj=self), name=name)
        self.env_spec = env_spec
        self.parameters = Parameters(dict())
        self.recorder = Recorder()
Пример #13
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Пример #14
0
 def __init__(
     self,
     env_spec,
     dynamics_model: DynamicsModel,
     config_or_config_dict: (DictConfig, dict),
     policy: Policy,
     name='mpc',
 ):
     super().__init__(env_spec, dynamics_model, name)
     self.config = construct_dict_config(config_or_config_dict, self)
     self.policy = policy
     self.parameters = Parameters(parameters=dict(),
                                  source_config=self.config,
                                  name=name + '_' + 'mpc_param')
     self.memory = TransitionData(env_spec=env_spec)
Пример #15
0
 def __init__(self, env_spec: EnvSpec, T: int, delta: float, iteration: int, cost_fn: CostFunc,
              dynamics_model_train_iter: int,
              dynamics: DynamicsEnvWrapper):
     param = Parameters(parameters=dict(T=T, delta=delta,
                                        iteration=iteration,
                                        dynamics_model_train_iter=dynamics_model_train_iter))
     super().__init__(env_spec, param)
     self.dynamics = dynamics
     self.U_hat = None
     self.X_hat = None
     self.iLqr_instance = iLQR(env_spec=env_spec,
                               delta=self.parameters('delta'),
                               T=self.parameters('T'),
                               dyn_model=dynamics._dynamics,
                               cost_fn=cost_fn)
Пример #16
0
 def __init__(self, env_spec,
              dynamics_model: DynamicsModel,
              config_or_config_dict: (DictConfig, dict),
              policy: Policy,
              name='mpc',
              ):
     super().__init__(env_spec, dynamics_model, name)
     self.config = construct_dict_config(config_or_config_dict, self)
     self.parameters = Parameters(parameters=dict(),
                                  source_config=self.config,
                                  name=name + '_' + 'mpc_param')
     self.policy = policy
     # TODO: 9.18 should also make memory served as init parameter in __init__,
     #  and set default value as Transition in init()
     self.memory = TransitionData(env_spec=env_spec)
Пример #17
0
    def __init__(self,
                 env_spec: EnvSpec,
                 name: str = 'algo',
                 warm_up_trajectories_number=0):
        """
        Constructor

        :param env_spec: environment specifications
        :type env_spec: EnvSpec
        :param name: name of the algorithm
        :type name: str
        :param warm_up_trajectories_number: how many trajectories used to warm up the training
        :type warm_up_trajectories_number: int
        """

        super().__init__(status=StatusWithSubInfo(obj=self), name=name)
        self.env_spec = env_spec
        self.parameters = Parameters(dict())
        self.recorder = Recorder(default_obj=self)
        self.warm_up_trajectories_number = warm_up_trajectories_number
Пример #18
0
 def __init__(
         self,
         train_sample_count_func,
         config_or_config_dict: (DictConfig, dict),
         func_dict: dict,
 ):
     super().__init__(func_dict)
     super(MEPPO_Flow, self).__init__(func_dict=func_dict)
     config = construct_dict_config(config_or_config_dict, obj=self)
     self.parameters = Parameters(source_config=config, parameters=dict())
     self.time_step_func = train_sample_count_func
     self._last_train_algo_point = -1
     self._start_train_algo_point_from_dynamics = -1
     self._last_test_algo_point = -1
     self._start_train_dynamics_point = -1
     self._last_test_dynamics_point = -1
     self._last_performance = 0
     self._last_chance = 0
     self._fictitious_set_count = 0
     assert callable(train_sample_count_func)
Пример #19
0
 def __init__(self, env_spec, dynamics_model: DynamicsModel,
              model_free_algo: ModelFreeAlgo,
              config_or_config_dict: (DictConfig, dict),
              name='sample_with_dynamics'
              ):
     super().__init__(env_spec, dynamics_model, name)
     config = construct_dict_config(config_or_config_dict, self)
     parameters = Parameters(parameters=dict(),
                             name='dyna_param',
                             source_config=config)
     sub_placeholder_input_list = []
     if isinstance(dynamics_model, PlaceholderInput):
         sub_placeholder_input_list.append(dict(obj=dynamics_model,
                                                attr_name='dynamics_model'))
     if isinstance(model_free_algo, PlaceholderInput):
         sub_placeholder_input_list.append(dict(obj=model_free_algo,
                                                attr_name='model_free_algo'))
     self.model_free_algo = model_free_algo
     self.config = config
     self.parameters = parameters
Пример #20
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventSchedule = None):
        """

        :param name:
        :param env:
        :param algo:
        :param env_spec:
        :param sampler:
        :param noise_adder:
        :param exploration_strategy:
        :param algo_saving_scheduler:
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Пример #21
0
    def __init__(self,
                 train_sample_count_func,
                 config_or_config_dict: (DictConfig, dict),
                 func_dict: dict,
                 ):
        """
        Constructor of TrainTestFlow

        :param train_sample_count_func: a function indicates how much training samples the agent has collected currently. When reach preset value, programm will quit training.
        :type train_sample_count_func: method
        :param config_or_config_dict: a Config or a dict should have the keys: (TEST_EVERY_SAMPLE_COUNT, TRAIN_EVERY_SAMPLE_COUNT, START_TRAIN_AFTER_SAMPLE_COUNT, START_TEST_AFTER_SAMPLE_COUNT)
        :type config_or_config_dict: Config or dict
        :param func_dict: function dict, holds the keys: 'sample', 'train', 'test'. each item in the dict as also should be a dict, holds the keys 'func', 'args', 'kwargs'
        :type func_dict: dict
        """
        super(TrainTestFlow, self).__init__(func_dict=func_dict)
        config = construct_dict_config(config_or_config_dict, obj=self)
        self.parameters = Parameters(source_config=config, parameters=dict())  # hyper parameter instance
        self.time_step_func = train_sample_count_func
        self.last_train_point = -1
        self.last_test_point = -1
        assert callable(train_sample_count_func)    # return TOTAL_AGENT_TRAIN_SAMPLE_COUNT
Пример #22
0
    def __init__(self,
                 train_sample_count_func,
                 config_or_config_dict: (DictConfig, dict),
                 func_dict: dict
                 ):
        super(MBMPC_TrainFlow, self).__init__(func_dict=func_dict)
        config = construct_dict_config(config_or_config_dict, obj=self)
        self.parameters = Parameters(source_config=config, parameters=dict())  # hyper parameter instance
        if train_sample_count_func:
            assert callable(train_sample_count_func)    # return TOTAL_AGENT_TRAIN_SAMPLE_COUNT

        from baconian.common.sampler.sample_data import MPC_TransitionData
        self.env = self.parameters('env')
        self.env_spec = self.env.env_spec
        env_spec = self.env_spec
        self.random_buffer = MPC_TransitionData(env_spec=env_spec,
                                                obs_shape=env_spec.obs_shape,
                                                action_shape=env_spec.action_shape,
                                                size=self.parameters('random_size'))
        self.rl_buffer = MPC_TransitionData(env_spec=env_spec,
                                                obs_shape=env_spec.obs_shape,
                                                action_shape=env_spec.action_shape,
                                                size=self.parameters('rl_size'))
Пример #23
0
    def test_scheduler_param(self):
        def func():
            global x
            return x

        parameters = dict(param1='aaaa',
                          param2=1.0,
                          param4=1.0,
                          param3=np.random.random([4, 2]))
        source_config, _ = self.create_dict_config()
        a = Parameters(
            parameters=parameters,
            source_config=source_config,
            name='test_params',
            to_scheduler_param_tuple=(dict(param_key='param2',
                                           scheduler=LinearSchedule(
                                               t_fn=func,
                                               schedule_timesteps=10,
                                               final_p=0.0)),
                                      dict(param_key='param4',
                                           scheduler=PiecewiseSchedule(
                                               t_fn=func,
                                               endpoints=((2, 0.5), (8, 0.2),
                                                          (10, 0.0)),
                                               outside_value=0.0,
                                           ))))
        a.init()
        for i in range(20):
            global x
            if x < 10:
                self.assertEqual(a('param2'), 1.0 - x * (1.0 - 0.0) / 10)
            else:
                self.assertEqual(a('param2'), 0.0)
            if x == 2:
                self.assertEqual(a('param4'), 0.5)
            if x == 8:
                self.assertEqual(a('param4'), 0.2)
            if x >= 10:
                self.assertEqual(a('param4'), 0.0)
            x += 1
        b, _ = self.create_parameters()
        b.copy_from(a)
        for key in a._source_config.required_key_dict.keys():
            if isinstance(a[key], np.ndarray):
                self.assertTrue(np.equal(a[key], b[key]).all())
            else:
                self.assertEqual(id(a[key]), id(b[key]))
                self.assertEqual(id(a(key)), id(b(key)))
        for key in a._parameters.keys():
            if isinstance(a[key], np.ndarray):
                self.assertTrue(np.equal(a[key], b[key]).all())
            else:
                self.assertEqual(a[key], b[key])
                self.assertEqual(a(key), b(key))
        self.assertEqual(a.to_scheduler_param_list.__len__(),
                         b.to_scheduler_param_list.__len__())
        for a_val, b_val in zip(a.to_scheduler_param_list,
                                b.to_scheduler_param_list):
            self.assertEqual(a_val['param_key'], b_val['param_key'])
            self.assertEqual(a_val['scheduler'].value(),
                             b_val['scheduler'].value())