from ray.rllib.agents.trainer_template import build_trainer from ray.rllib.evaluation.postprocessing import Postprocessing from ray.rllib.policy.sample_batch import SampleBatch from ray.rllib.policy.torch_policy import EntropyCoeffSchedule, LearningRateSchedule from ray.rllib.policy.torch_policy_template import build_torch_policy from ray.rllib.utils import try_import_torch from ray.rllib.utils.torch_ops import sequence_mask from algorithms.data_augmentation.data_augmentation import apply_data_augmentation from algorithms.data_augmenting_ppo_agent.ppo_utils import (compute_running_mean_and_variance, RunningStat, ExpWeightedMovingAverageStat) from algorithms.data_augmenting_ppo_agent.ucb_learner import UCBLearner from algorithms.data_augmenting_ppo_agent.sync_phasic_optimizer import SyncPhasicOptimizer torch, nn = try_import_torch() logger = logging.getLogger(__name__) def compute_ppo_loss(policy, dist_class, model, train_batch, action_dist, state): mask = None if state: max_seq_len = torch.max(train_batch["seq_lens"]) mask = sequence_mask(train_batch["seq_lens"], max_seq_len) mask = torch.reshape(mask, [-1]) policy.loss_obj = PPOLoss( dist_class, model, train_batch[Postprocessing.VALUE_TARGETS], train_batch[Postprocessing.ADVANTAGES],
import logging import os from typing import Dict import ray from ray.rllib import BaseEnv from ray.rllib.utils import try_import_torch torch, _ = try_import_torch() from ray.rllib.utils.typing import PolicyID from ray import tune from ray.rllib.agents.callbacks import DefaultCallbacks from ray.rllib.evaluation import MultiAgentEpisode, RolloutWorker from ray.rllib.policy import Policy from grl.utils.strategy_spec import StrategySpec from grl.rllib_tools.space_saving_logger import get_trainer_logger_creator from grl.utils.common import find_free_port from grl.utils.common import data_dir from grl.envs.oshi_zumo_multi_agent_env import ThousandActionOshiZumoMultiAgentEnv from grl.rllib_tools.policy_checkpoints import load_pure_strat from grl.rl_apps.scenarios.catalog import scenario_catalog from grl.rl_apps.scenarios.nfsp_scenario import NFSPScenario from ray.rllib.agents.sac.sac import SACTrainer logger = logging.getLogger(__name__) if __name__ == "__main__": tmp_br_env = ThousandActionOshiZumoMultiAgentEnv( env_config={ 'version': "oshi_zumo",