def __init__(self, state_dim, action_dim, gamma=0.99, noise_std=0.02, hidden_dim=64, actor_lr=0.001, critic_lr=0.001, verbose=False): self.gamma = gamma # self.tau = 0.01 self.tau = 0.001 self.actor = Actor(state_dim, noise_std=noise_std, hidden_dim=hidden_dim) self.actor_target = Actor(state_dim, noise_std=noise_std, hidden_dim=hidden_dim) self.critic = Critic(state_dim, action_dim, hidden_dim=hidden_dim) self.critic_target = Critic(state_dim, action_dim, hidden_dim=hidden_dim) self.actor_target.load_state_dict(self.actor.state_dict()) self.critic_target.load_state_dict(self.critic.state_dict()) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=actor_lr) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=critic_lr) self.buffer = ReplayBuffer(max_size=1e5) self.logging_period = 10 if verbose else 100 # --- ModelIO --- self.modelio = ModelIO(model_path=Path(__file__).resolve().parent / 'models')
def __init__(self, state_dim, action_dim, gamma=0.99, hidden_dim=64, actor_lr=0.001, critic_lr=0.001, K_epochs=5, eps_clip=0.2, entropy_coeff=0.02, d2c=None, verbose=False): self.gamma = gamma self.eps_clip = eps_clip self.K_epochs = K_epochs self.entropy_coeff = entropy_coeff self.d2c = d2c self.verbose = verbose self.critic = Critic(state_dim, hidden_dim=hidden_dim).to(device) self.actor = Actor(state_dim, action_dim, hidden_dim=hidden_dim).to(device) self.actor_old = Actor(state_dim, action_dim, hidden_dim=hidden_dim).to(device) self.actor_optimizer = torch.optim.Adam(self.actor.parameters(), lr=actor_lr) self.critic_optimizer = torch.optim.Adam(self.critic.parameters(), lr=critic_lr) self.actor_old.load_state_dict(self.actor.state_dict()) self.buffer = Buffer() # --- ModelIO --- self.modelio = ModelIO(model_path=Path(__file__).resolve().parent / 'models')
def __init__(self, state_dim, action_dim, gamma, d2c=None): self._V = StateValueFunction(state_dim) self._pi = Policy(state_dim, action_dim) self.d2c = d2c # discrete to continuous actions # self._V.cuda() # self._pi.cuda() self._gamma = gamma self._loss_function = nn.MSELoss() self._V_optimizer = optim.Adam(self._V.parameters(), lr=0.001) self._pi_optimizer = optim.Adam(self._pi.parameters(), lr=0.0001) self._action_dim = action_dim # --- ModelIO --- self._modelio = ModelIO(model_path=Path(__file__).resolve().parent / 'models') self._baseline_model_name = 'ac_baseline.pt' self._policy_model_name = 'ac_policy.pt'
def __init__(self, state_dim, action_dim, gamma, d2c=None): self._q = Q(state_dim, action_dim) self._q_target = Q(state_dim, action_dim) # self._q.cuda() # self._q_target.cuda() self._gamma = gamma self._loss_function = nn.MSELoss() self._q_optimizer = optim.Adam(self._q.parameters(), lr=0.0001) self._action_dim = action_dim self._replay_buffer = ReplayBuffer(5000) self._d2c = d2c # --- ModelIO --- self._modelio = ModelIO(model_path=Path(__file__).resolve().parent / 'models') self._q_model_name = 'q.pt' self._target_model_name = 'target.pt'
def __init__(self, state_dim, action_dim, gamma=0.99, hidden_dim=64, policy_lr=0.001, baseline_lr=0.001): self._V = StateValueFunction(state_dim, hidden_dim=hidden_dim) self._pi = Policy(state_dim, action_dim, hidden_dim=hidden_dim) # self._V.cuda() # self._pi.cuda() self._gamma = gamma self._loss_function = nn.MSELoss() self._V_optimizer = optim.Adam(self._V.parameters(), lr=baseline_lr) self._pi_optimizer = optim.Adam(self._pi.parameters(), lr=policy_lr) self._action_dim = action_dim # --- ModelIO --- self._modelio = ModelIO(model_path=Path(__file__).resolve().parent / 'models')