def __init__(self,
              state_dim,
              action_dim,
              gamma=0.99,
              noise_std=0.02,
              hidden_dim=64,
              actor_lr=0.001,
              critic_lr=0.001,
              verbose=False):
     self.gamma = gamma
     # self.tau = 0.01
     self.tau = 0.001
     self.actor = Actor(state_dim,
                        noise_std=noise_std,
                        hidden_dim=hidden_dim)
     self.actor_target = Actor(state_dim,
                               noise_std=noise_std,
                               hidden_dim=hidden_dim)
     self.critic = Critic(state_dim, 
                         action_dim, 
                         hidden_dim=hidden_dim)
     self.critic_target = Critic(state_dim,
                                 action_dim,
                                 hidden_dim=hidden_dim)
     self.actor_target.load_state_dict(self.actor.state_dict())
     self.critic_target.load_state_dict(self.critic.state_dict())
     self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                             lr=actor_lr)
     self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                              lr=critic_lr)
     self.buffer = ReplayBuffer(max_size=1e5)
     self.logging_period = 10 if verbose else 100
     # --- ModelIO ---
     self.modelio = ModelIO(model_path=Path(__file__).resolve().parent /
                            'models')
Пример #2
0
    def __init__(self,
                 state_dim,
                 action_dim,
                 gamma=0.99,
                 hidden_dim=64,
                 actor_lr=0.001,
                 critic_lr=0.001,
                 K_epochs=5,
                 eps_clip=0.2,
                 entropy_coeff=0.02,
                 d2c=None,
                 verbose=False):
        self.gamma = gamma
        self.eps_clip = eps_clip
        self.K_epochs = K_epochs
        self.entropy_coeff = entropy_coeff
        self.d2c = d2c
        self.verbose = verbose

        self.critic = Critic(state_dim, hidden_dim=hidden_dim).to(device)
        self.actor = Actor(state_dim, action_dim,
                           hidden_dim=hidden_dim).to(device)
        self.actor_old = Actor(state_dim, action_dim,
                               hidden_dim=hidden_dim).to(device)
        self.actor_optimizer = torch.optim.Adam(self.actor.parameters(),
                                                lr=actor_lr)
        self.critic_optimizer = torch.optim.Adam(self.critic.parameters(),
                                                 lr=critic_lr)
        self.actor_old.load_state_dict(self.actor.state_dict())
        self.buffer = Buffer()
        # --- ModelIO ---
        self.modelio = ModelIO(model_path=Path(__file__).resolve().parent /
                               'models')
Пример #3
0
 def __init__(self, state_dim, action_dim, gamma, d2c=None):
     self._V = StateValueFunction(state_dim)
     self._pi = Policy(state_dim, action_dim)
     self.d2c = d2c  # discrete to continuous actions
     # self._V.cuda()
     # self._pi.cuda()
     self._gamma = gamma
     self._loss_function = nn.MSELoss()
     self._V_optimizer = optim.Adam(self._V.parameters(), lr=0.001)
     self._pi_optimizer = optim.Adam(self._pi.parameters(), lr=0.0001)
     self._action_dim = action_dim
     # --- ModelIO ---
     self._modelio = ModelIO(model_path=Path(__file__).resolve().parent /
                             'models')
     self._baseline_model_name = 'ac_baseline.pt'
     self._policy_model_name = 'ac_policy.pt'
Пример #4
0
    def __init__(self, state_dim, action_dim, gamma, d2c=None):
        self._q = Q(state_dim, action_dim)
        self._q_target = Q(state_dim, action_dim)

        # self._q.cuda()
        # self._q_target.cuda()

        self._gamma = gamma
        self._loss_function = nn.MSELoss()
        self._q_optimizer = optim.Adam(self._q.parameters(), lr=0.0001)
        self._action_dim = action_dim
        self._replay_buffer = ReplayBuffer(5000)
        self._d2c = d2c
        # --- ModelIO ---
        self._modelio = ModelIO(model_path=Path(__file__).resolve().parent /
                                'models')
        self._q_model_name = 'q.pt'
        self._target_model_name = 'target.pt'
 def __init__(self,
              state_dim,
              action_dim,
              gamma=0.99,
              hidden_dim=64,
              policy_lr=0.001,
              baseline_lr=0.001):
     self._V = StateValueFunction(state_dim, hidden_dim=hidden_dim)
     self._pi = Policy(state_dim, action_dim, hidden_dim=hidden_dim)
     # self._V.cuda()
     # self._pi.cuda()
     self._gamma = gamma
     self._loss_function = nn.MSELoss()
     self._V_optimizer = optim.Adam(self._V.parameters(), lr=baseline_lr)
     self._pi_optimizer = optim.Adam(self._pi.parameters(), lr=policy_lr)
     self._action_dim = action_dim
     # --- ModelIO ---
     self._modelio = ModelIO(model_path=Path(__file__).resolve().parent /
                             'models')