def __init__(self, args, env):
        self.args = args
        self.env = env
        self.evolver = utils_ne.SSNE(self.args)
        self.best_r = 0
        self.best_state = []

        #Init population
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))

        #Turn off gradients and put in eval mode
        for actor in self.pop:
            actor.eval()

        #Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.ounoise = ddpg.OUNoise(args.action_dim)

        #Trackers
        self.num_games = 0
        self.num_frames = 0
        self.gen_frames = None
示例#2
0
    def __init__(self, args: Parameters, env):
        self.args = args; self.env = env

        # Init population
        self.pop = []
        self.buffers = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.GeneticAgent(args))

        # Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        if args.per:
            self.replay_buffer = replay_memory.PrioritizedReplayMemory(args.buffer_size, args.device,
                                                                       beta_frames=self.args.num_frames)
        else:
            self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size, args.device)

        self.ounoise = ddpg.OUNoise(args.action_dim)
        self.evolver = utils_ne.SSNE(self.args, self.rl_agent.critic, self.evaluate)

        # Population novelty
        self.ns_r = 1.0
        self.ns_delta = 0.1
        self.best_train_reward = 0.0
        self.time_since_improv = 0
        self.step = 1

        # Trackers
        self.num_games = 0; self.num_frames = 0; self.iterations = 0; self.gen_frames = None
示例#3
0
    def __init__(self, args, env):
        self.args = args
        self.evolver = utils_ne.SSNE(self.args)
        # self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))
        for actor in self.pop: actor.eval()

        # self.workers = [Worker.remote(args) for _ in range(self.args.pop_size+1)]

        # args.is_cuda = True; args.is_memory_cuda = True
        self.rl_agent = ddpg.DDPG(args)
        # self.rl_agent.share_memory()

        self.ounoise = ddpg.OUNoise(args.action_dim)
        # self.replay_queue = mp.Manager().Queue()  # mp.Manager().list()
        # self.replay_queue = mp.Queue()
        # self.replay_memory = mp.Manager().list()
        # self.replay_memory = mp.Array()
        # self.replay_memory = mp.Queue()



        self.workers = self.pop.append(self.rl_agent.actor)

        # for key in range(self.args.pop_size):
        #     self.replay_memory[key] = replay_memory.ReplayMemory(self.args.buffer_size)

        # self.learner = LearnerThread(self.replay_memory, self.rl_agent)
        # self.learner.start()
        # Stats
        # self.timers = {
        #     k: TimerStat()
        #     for k in [
        #     "put_weights", "get_samples", "sample_processing",
        #     "replay_processing", "update_priorities", "train", "sample"
        # ]
        # }

        self.num_games = 0; self.num_frames = 0; self.gen_frames = 0; self.len_replay = 0