def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: (Env, Wrapper), algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, reset_noise_every_terminal_state=False, reset_state_every_sample=False, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventScheduler = None): """ :param name: the name of the agent instance :type name: str :param env: environment that interacts with agent :type env: Env :param algo: algorithm of the agent :type algo: Algo :param env_spec: environment specifications: action apace and environment space :type env_spec: EnvSpec :param sampler: sampler :type sampler: Sampler :param reset_noise_every_terminal_state: reset the noise every sampled trajectory :type reset_noise_every_terminal_state: bool :param reset_state_every_sample: reset the state everytime perofrm the sample/rollout :type reset_state_every_sample: bool :param noise_adder: add action noise for exploration in action space :type noise_adder: AgentActionNoiseWrapper :param exploration_strategy: exploration strategy in action space :type exploration_strategy: ExplorationStrategy :param algo_saving_scheduler: control the schedule the varying parameters in training process :type algo_saving_scheduler: EventSchedule """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict( reset_noise_every_terminal_state=reset_noise_every_terminal_state, reset_state_every_sample=reset_state_every_sample)) self.env = env self.algo = algo self._env_step_count = 0 if sampler is None: sampler = Sampler() self.sampler = sampler self.recorder = Recorder(default_obj=self) self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler
def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: Env, algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventScheduler = None): """ :param name: the name of the agent instance :type name: str :param env: environment that interacts with agent :type env: Env :param algo: algorithm of the agent :type algo: Algo :param env_spec: environment specifications: action apace and environment space :type env_spec: EnvSpec :param sampler: sampler :type sampler: Sampler :param noise_adder: add action noise for exploration in action space :type noise_adder: AgentActionNoiseWrapper :param exploration_strategy: exploration strategy in action space :type exploration_strategy: ExplorationStrategy :param algo_saving_scheduler: control the schedule the varying parameters in training process :type algo_saving_scheduler: EventSchedule """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict()) self.total_test_samples = 0 self.total_train_samples = 0 self.env = env self.algo = algo self._env_step_count = 0 self.sampler = sampler self.recorder = Recorder() self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.sampler = sampler if sampler else Sampler( env_spec=env_spec, name='{}_sampler'.format(name)) self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler
def __init__( self, name, # config_or_config_dict: (DictConfig, dict), env: Env, algo: Algo, env_spec: EnvSpec, sampler: Sampler = None, noise_adder: AgentActionNoiseWrapper = None, exploration_strategy: ExplorationStrategy = None, algo_saving_scheduler: EventSchedule = None): """ :param name: :param env: :param algo: :param env_spec: :param sampler: :param noise_adder: :param exploration_strategy: :param algo_saving_scheduler: """ super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self)) self.parameters = Parameters(parameters=dict()) self.total_test_samples = 0 self.total_train_samples = 0 self.env = env self.algo = algo self._env_step_count = 0 self.sampler = sampler self.recorder = Recorder() self.env_spec = env_spec if exploration_strategy: assert isinstance(exploration_strategy, ExplorationStrategy) self.explorations_strategy = exploration_strategy else: self.explorations_strategy = None self.sampler = sampler if sampler else Sampler( env_spec=env_spec, name='{}_sampler'.format(name)) self.noise_adder = noise_adder self.algo_saving_scheduler = algo_saving_scheduler
def create_sampler(self, env_spec): return Sampler(env_spec=env_spec, name='sampler')