Пример #1
0
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

                # Create a new config by merging the dicts.
                # run_config is not a tunable hyperparameter so it does not need to be
                # merged.
                run_config = base_config.pop("run_config", None)
                self._merged_config = merge_dicts(base_config, self.config)
                self._merged_config["run_config"] = run_config
Пример #2
0
            def default_resource_request(
                cls, config: PartialTrainerConfigDict
            ) -> Union[Resources, PlacementGroupFactory]:
                resolved_config = merge_dicts(base_config, config)
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(process_datasets=False)

                return rllib_trainer.default_resource_request(rllib_config)
Пример #3
0
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

                # Create a new config by merging the dicts.
                # run_config is not a tunable hyperparameter so it does not need to be
                # merged.
                run_config = base_config.pop("run_config", None)
                self._merged_config = merge_dicts(base_config, self.config)
                self._merged_config["run_config"] = run_config
                merged_scaling_config = self._merged_config.get(
                    "scaling_config")
                if isinstance(merged_scaling_config, dict):
                    merged_scaling_config = ScalingConfig(
                        **merged_scaling_config)
                self._merged_config[
                    "scaling_config"] = self._reconcile_scaling_config_with_trial_resources(
                        merged_scaling_config)
Пример #4
0
            def __init__(
                self,
                config: Optional[PartialAlgorithmConfigDict] = None,
                env: Optional[Union[str, EnvType]] = None,
                logger_creator: Optional[Callable[[], Logger]] = None,
                remote_checkpoint_dir: Optional[str] = None,
                custom_syncer: Optional[Syncer] = None,
            ):
                resolved_config = merge_dicts(base_config, config or {})
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(process_datasets=True)

                super(AIRRLTrainer, self).__init__(
                    config=rllib_config,
                    env=env,
                    logger_creator=logger_creator,
                    remote_checkpoint_dir=remote_checkpoint_dir,
                    custom_syncer=custom_syncer,
                )
Пример #5
0
            def __init__(
                self,
                config: Optional[PartialTrainerConfigDict] = None,
                env: Optional[Union[str, EnvType]] = None,
                logger_creator: Optional[Callable[[], Logger]] = None,
                remote_checkpoint_dir: Optional[str] = None,
                sync_function_tpl: Optional[str] = None,
            ):
                resolved_config = merge_dicts(base_config, config)
                param_dict["config"] = resolved_config

                trainer = trainer_cls(**param_dict)
                rllib_config = trainer._get_rllib_config(process_datasets=True)

                super(AIRRLTrainer, self).__init__(
                    rllib_config,
                    env,
                    logger_creator,
                    remote_checkpoint_dir,
                    sync_function_tpl,
                )
Пример #6
0
    def __init__(self, algo_class=None):
        """Initializes a ApexConfig instance."""
        super().__init__(algo_class=algo_class or ApexDQN)

        # fmt: off
        # __sphinx_doc_begin__
        # APEX-DQN settings overriding DQN ones:
        # .training()
        self.optimizer = merge_dicts(
            DQNConfig().optimizer, {
                "max_weight_sync_delay": 400,
                "num_replay_buffer_shards": 4,
                "debug": False
            })
        self.n_step = 3
        self.train_batch_size = 512
        self.target_network_update_freq = 500000
        self.training_intensity = 1

        # max number of inflight requests to each sampling worker
        # see the AsyncRequestsManager class for more details
        # Tuning these values is important when running experimens with large sample
        # batches. If the sample batches are large in size, then there is the risk that
        # the object store may fill up, causing the store to spill objects to disk.
        # This can cause any asynchronous requests to become very slow, making your
        # experiment run slowly. You can inspect the object store during your
        # experiment via a call to ray memory on your headnode, and by using the ray
        # dashboard. If you're seeing that the object store is filling up, turn down
        # the number of remote requests in flight, or enable compression in your
        # experiment of timesteps.
        self.max_requests_in_flight_per_sampler_worker = 2
        self.max_requests_in_flight_per_replay_worker = float("inf")
        self.timeout_s_sampler_manager = 0.0
        self.timeout_s_replay_manager = 0.0
        # APEX-DQN is using a distributed (non local) replay buffer.
        self.replay_buffer_config = {
            "no_local_replay_buffer": True,
            # Specify prioritized replay by supplying a buffer type that supports
            # prioritization
            "type": "MultiAgentPrioritizedReplayBuffer",
            "capacity": 2000000,
            # Alpha parameter for prioritized replay buffer.
            "prioritized_replay_alpha": 0.6,
            # Beta parameter for sampling from prioritized replay buffer.
            "prioritized_replay_beta": 0.4,
            # Epsilon to add to the TD errors when updating priorities.
            "prioritized_replay_eps": 1e-6,
            "learning_starts": 50000,
            # Whether all shards of the replay buffer must be co-located
            # with the learner process (running the execution plan).
            # This is preferred b/c the learner process should have quick
            # access to the data from the buffer shards, avoiding network
            # traffic each time samples from the buffer(s) are drawn.
            # Set this to False for relaxing this constraint and allowing
            # replay shards to be created on node(s) other than the one
            # on which the learner is located.
            "replay_buffer_shards_colocated_with_driver": True,
            "worker_side_prioritization": True,
            # Deprecated key.
            "prioritized_replay": DEPRECATED_VALUE,
        }

        # .rollouts()
        self.num_workers = 32
        self.rollout_fragment_length = 50
        self.exploration_config = {
            "type": "PerWorkerEpsilonGreedy",
        }

        # .resources()
        self.num_gpus = 1

        # .reporting()
        self.min_time_s_per_iteration = 30
        self.min_sample_timesteps_per_iteration = 25000
Пример #7
0
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)

                # Create a new config by merging the dicts.
                self._merged_config = merge_dicts(base_config, self.config)