Exemplo n.º 1
0
 def __init__(self, state_dim, action_dim, config):
     super().__init__(state_dim, action_dim, config)
     self.network = DDPGBulletNetworkSURND(state_dim, action_dim, config)
     self.memory = ExperienceReplayBuffer(config.memory_size)
     self.motivation = MetaCriticRNDMotivation(self.network.metacritic_model, config.motivation_lr, config.motivation_variant, config.motivation_eta, self.memory, config.motivation_batch_size,
                                               config.device)
     self.algorithm = DDPG(self.network, config.actor_lr, config.critic_lr, config.gamma, config.tau, self.memory, config.batch_size, self.motivation)
Exemplo n.º 2
0
 def __init__(self, gate, critic, gate_lr, critic_lr, gamma, tau,
              memory_buffer, sample_size, forward_model, meta_critic):
     self._gate = gate
     self._ddpg = DDPG(gate, critic, gate_lr, critic_lr, gamma, tau,
                       memory_buffer, sample_size)
     self._forward_model = forward_model
     self._meta_critic = meta_critic
Exemplo n.º 3
0
 def __init__(self, state_dim, action_dim, config):
     super().__init__(state_dim, action_dim, config)
     self.network = DDPGAerisNetworkFIM(state_dim, action_dim, config)
     self.memory = ExperienceReplayBuffer(config.memory_size)
     self.motivation = ForwardInverseModelMotivation(self.network.forward_model, config.forward_model_lr, self.network.inverse_model, config.forward_model_lr,
                                                     0.5, config.forward_model_eta,
                                                     config.forward_model_variant, self.memory, config.forward_model_batch_size, config.device)
     self.algorithm = DDPG(self.network, config.actor_lr, config.critic_lr, config.gamma, config.tau, self.memory, config.batch_size, self.motivation)
Exemplo n.º 4
0
    def __init__(self, network, lr, gamma, tau, eta, memory, sample_size):
        self.network = network
        self._optimizer = torch.optim.Adam(self.network.forward_model.parameters(), lr=lr)
        self.eta = eta
        self._memory = memory
        self._sample_size = sample_size
        self._weight = torch.tensor([[0.9, 0.1]], dtype=torch.float32)

        self.gate_algorithm = DDPG(self.network.gate, lr, lr, gamma, tau, memory, 32)
Exemplo n.º 5
0
 def __init__(self, state_dim, action_dim, config):
     super().__init__(state_dim, action_dim, config)
     self.network = DDPGBulletNetwork(state_dim, action_dim, config)
     self.memory = ExperienceReplayBuffer(config.memory_size)
     self.algorithm = DDPG(self.network, config.actor_lr, config.critic_lr, config.gamma, config.tau, self.memory, config.batch_size)
Exemplo n.º 6
0
 def __init__(self, state_dim, action_dim, config):
     super().__init__(state_dim, action_dim, config)
     self.network = DDPGAerisNetworkQRND(state_dim, action_dim, config)
     self.memory = ExperienceReplayBuffer(config.memory_size)
     self.motivation = QRNDMotivation(self.network.qrnd_model, config.forward_model_lr, config.forward_model_eta, self.memory)
     self.algorithm = DDPG(self.network, config.actor_lr, config.critic_lr, config.gamma, config.tau, self.memory, config.batch_size, self.motivation)
Exemplo n.º 7
0
 def __init__(self, state_dim, action_dim, config):
     super().__init__(state_dim, action_dim, config)
     self.network = DDPGBulletNetworkDOP(state_dim, action_dim, config)
     self.memory = DOPReplayBuffer(config.memory_size)
     self.motivation = DOPMotivation(self.network.dop_model, config.motivation_lr, config.motivation_eta, self.memory, config.motivation_batch_size, config.device)
     self.algorithm = DDPG(self.network, config.actor_lr, config.critic_lr, config.gamma, config.tau, self.memory, config.batch_size)
Exemplo n.º 8
0
                                                int(time.time()))).make()
    else:
        environment = gym.make(args.env)
        is_env_pool = False

    tf.reset_default_graph()
    with tf.device(device):
        if args.eval:
            replay_memory = None
        else:
            replay_memory = Replay_Memory(memory_size=args.memory_size)
        if args.model == 'DDPG':
            agent = DDPG(environment,
                         args.hidden_dims,
                         replay_memory=replay_memory,
                         gamma=args.gamma,
                         actor_lr=args.actor_lr,
                         critic_lr=args.critic_lr,
                         tau=args.tau,
                         N=args.N)
        elif args.model == 'QRDDPG':
            agent = QRDDPG(environment,
                           args.hidden_dims,
                           replay_memory=replay_memory,
                           gamma=args.gamma,
                           actor_lr=args.actor_lr,
                           critic_lr=args.critic_lr,
                           tau=args.tau,
                           N=args.N,
                           kappa=args.kappa,
                           n_quantile=args.n_quantile)
        elif args.model == 'D3PG':
Exemplo n.º 9
0
import numpy as np
import time

MAX_EPISODES = 900
MAX_EP_STEPS = 200
ON_TRAIN = False

# set env
env = ArmEnv()
s_dim = env.state_dim
a_dim = env.action_dim
a_bound = env.action_bound

# set RL method
pd = PD()
rl = DDPG(a_dim, s_dim, a_bound)

steps = []


def runpd():
    for i in range(MAX_EPISODES):
        s = env.reset()
        ep_r = 0.
        for j in range(MAX_EP_STEPS):

            a = pd.cal(s, np.array([0, 0, -4, 0, 0]))
            s, r, done, safe = env.step(a)
            if done or j == MAX_EP_STEPS - 1 or safe is False:
                print('Ep: %i | %s | %s | ep_r: %.1f | step: %i' %
                      (i, '---' if not done else 'done',