Exemplo n.º 1
0
    def __init__(self, args, env):
        self.args = args
        self.env = env
        self.evolver = utils_ne.SSNE(self.args)
        self.best_r = 0
        self.best_state = []

        #Init population
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))

        #Turn off gradients and put in eval mode
        for actor in self.pop:
            actor.eval()

        #Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.ounoise = ddpg.OUNoise(args.action_dim)

        #Trackers
        self.num_games = 0
        self.num_frames = 0
        self.gen_frames = None
Exemplo n.º 2
0
    def __init__(self, args: Parameters, env):
        self.args = args; self.env = env

        # Init population
        self.pop = []
        self.buffers = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.GeneticAgent(args))

        # Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        if args.per:
            self.replay_buffer = replay_memory.PrioritizedReplayMemory(args.buffer_size, args.device,
                                                                       beta_frames=self.args.num_frames)
        else:
            self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size, args.device)

        self.ounoise = ddpg.OUNoise(args.action_dim)
        self.evolver = utils_ne.SSNE(self.args, self.rl_agent.critic, self.evaluate)

        # Population novelty
        self.ns_r = 1.0
        self.ns_delta = 0.1
        self.best_train_reward = 0.0
        self.time_since_improv = 0
        self.step = 1

        # Trackers
        self.num_games = 0; self.num_frames = 0; self.iterations = 0; self.gen_frames = None
Exemplo n.º 3
0
    def __init__(self, args: Parameters):

        self.args = args

        self.actor = Actor(args)
        self.actor_optim = Adam(self.actor.parameters(), lr=1e-3)

        self.buffer = replay_memory.ReplayMemory(self.args.individual_bs,
                                                 args.device)
        self.loss = nn.MSELoss()
Exemplo n.º 4
0
    def __init__(self, args, env):
        self.args = args
        self.env = env

        #Create the Agent
        self.agent = magent.MetaAgent(args.state_dim, args.action_dim,
                                      args.num_subs, args.master_lr,
                                      args.sub_lr)

        #Init Replay Buffer
        self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
Exemplo n.º 5
0
    def __init__(self, args):

        self.args = args
        self.buffer = replay_memory.ReplayMemory(args.individual_bs,
                                                 args.device)

        self.actor = Actor(args, init=True)
        self.actor_target = Actor(args, init=True)
        self.actor_optim = Adam(self.actor.parameters(), lr=0.5e-4)

        self.critic = Critic(args)
        self.critic_target = Critic(args)
        self.critic_optim = Adam(self.critic.parameters(), lr=0.5e-3)

        self.gamma = args.gamma
        self.tau = self.args.tau
        self.loss = nn.MSELoss()

        hard_update(self.actor_target,
                    self.actor)  # Make sure target is with the same weight
        hard_update(self.critic_target, self.critic)