Пример #1
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: (Env, Wrapper),
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            reset_noise_every_terminal_state=False,
            reset_state_every_sample=False,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param reset_noise_every_terminal_state: reset the noise every sampled trajectory
        :type reset_noise_every_terminal_state: bool
        :param reset_state_every_sample: reset the state everytime perofrm the sample/rollout
        :type reset_state_every_sample: bool
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict(
            reset_noise_every_terminal_state=reset_noise_every_terminal_state,
            reset_state_every_sample=reset_state_every_sample))
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        if sampler is None:
            sampler = Sampler()
        self.sampler = sampler
        self.recorder = Recorder(default_obj=self)
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Пример #2
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventScheduler = None):
        """

        :param name: the name of the agent instance
        :type name: str
        :param env: environment that interacts with agent
        :type env: Env
        :param algo: algorithm of the agent
        :type algo: Algo
        :param env_spec: environment specifications: action apace and environment space
        :type env_spec: EnvSpec
        :param sampler: sampler
        :type sampler: Sampler
        :param noise_adder: add action noise for exploration in action space
        :type noise_adder: AgentActionNoiseWrapper
        :param exploration_strategy: exploration strategy in action space
        :type exploration_strategy: ExplorationStrategy
        :param algo_saving_scheduler: control the schedule the varying parameters in training process
        :type algo_saving_scheduler: EventSchedule
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Пример #3
0
    def __init__(
            self,
            name,
            # config_or_config_dict: (DictConfig, dict),
            env: Env,
            algo: Algo,
            env_spec: EnvSpec,
            sampler: Sampler = None,
            noise_adder: AgentActionNoiseWrapper = None,
            exploration_strategy: ExplorationStrategy = None,
            algo_saving_scheduler: EventSchedule = None):
        """

        :param name:
        :param env:
        :param algo:
        :param env_spec:
        :param sampler:
        :param noise_adder:
        :param exploration_strategy:
        :param algo_saving_scheduler:
        """
        super(Agent, self).__init__(name=name, status=StatusWithSubInfo(self))
        self.parameters = Parameters(parameters=dict())
        self.total_test_samples = 0
        self.total_train_samples = 0
        self.env = env
        self.algo = algo
        self._env_step_count = 0
        self.sampler = sampler
        self.recorder = Recorder()
        self.env_spec = env_spec
        if exploration_strategy:
            assert isinstance(exploration_strategy, ExplorationStrategy)
            self.explorations_strategy = exploration_strategy
        else:
            self.explorations_strategy = None
        self.sampler = sampler if sampler else Sampler(
            env_spec=env_spec, name='{}_sampler'.format(name))
        self.noise_adder = noise_adder
        self.algo_saving_scheduler = algo_saving_scheduler
Пример #4
0
 def create_sampler(self, env_spec):
     return Sampler(env_spec=env_spec, name='sampler')