Exemplo n.º 1
0
    def __init__(self, args, env):
        self.args = args
        self.env = env
        self.evolver = utils_ne.SSNE(self.args)
        self.best_r = 0
        self.best_state = []

        #Init population
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))

        #Turn off gradients and put in eval mode
        for actor in self.pop:
            actor.eval()

        #Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.ounoise = ddpg.OUNoise(args.action_dim)

        #Trackers
        self.num_games = 0
        self.num_frames = 0
        self.gen_frames = None
Exemplo n.º 2
0
    def __init__(self, args: Parameters, env):
        self.args = args; self.env = env

        # Init population
        self.pop = []
        self.buffers = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.GeneticAgent(args))

        # Init RL Agent
        self.rl_agent = ddpg.DDPG(args)
        if args.per:
            self.replay_buffer = replay_memory.PrioritizedReplayMemory(args.buffer_size, args.device,
                                                                       beta_frames=self.args.num_frames)
        else:
            self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size, args.device)

        self.ounoise = ddpg.OUNoise(args.action_dim)
        self.evolver = utils_ne.SSNE(self.args, self.rl_agent.critic, self.evaluate)

        # Population novelty
        self.ns_r = 1.0
        self.ns_delta = 0.1
        self.best_train_reward = 0.0
        self.time_since_improv = 0
        self.step = 1

        # Trackers
        self.num_games = 0; self.num_frames = 0; self.iterations = 0; self.gen_frames = None
Exemplo n.º 3
0
    def test_mutation(self):
        models = [800, 1400, 1600, 1800, 2200]
        source_dir = 'exp/cheetah_sm0.1_distil_save_20/models/'

        pr, nmr, smr = [], [], []
        ps, nms, sms = [], [], []
        ssne = mod_neuro_evo.SSNE(self.args, None, None)
        for i, model in enumerate(models):
            print("========== Mutation for {} ==============".format(model))
            agent = self.load_genetic_agent(source_dir, model)
            p_reward, p_states = self.evaluate(agent)
            pr.append(p_reward)
            ps.append(p_states)

            nchild = ddpg.GeneticAgent(self.args)
            ssne.clone(agent, nchild)
            ssne.mutate_inplace(nchild)

            nm_reward, nm_states = self.evaluate(nchild)
            nmr.append(nm_reward)
            nms.append(nm_states)

            dchild = ddpg.GeneticAgent(self.args)
            ssne.clone(agent, dchild)
            ssne.proximal_mutate(dchild, 0.05)
            sm_reward, sm_states = self.evaluate(dchild)
            smr.append(sm_reward)
            sms.append(sm_states)

            print("Parent", pr[-1])
            print("Normal", nmr[-1])
            print("Safe", smr[-1])

        # Ablation for safe mutation
        ablation_mag = [0.0, 0.01, 0.05, 0.1, 0.2]
        agent = self.load_genetic_agent(source_dir, 2200)
        ablr = []
        abls = []
        for mag in ablation_mag:
            dchild = ddpg.GeneticAgent(self.args)
            ssne.clone(agent, dchild)
            ssne.proximal_mutate(dchild, mag)

            sm_reward, sm_states = self.evaluate(dchild)
            ablr.append(sm_reward)
            abls.append(sm_states)

        save_file = 'visualise/mutation'
        np.savez(save_file,
                 pr=pr,
                 nmr=nmr,
                 smr=smr,
                 ps=ps,
                 nms=nms,
                 sms=sms,
                 ablr=ablr,
                 abls=abls,
                 abl_mag=ablation_mag)
Exemplo n.º 4
0
    def __init__(self, args, env):
        self.args = args
        self.evolver = utils_ne.SSNE(self.args)
        # self.replay_buffer = replay_memory.ReplayMemory(args.buffer_size)
        self.pop = []
        for _ in range(args.pop_size):
            self.pop.append(ddpg.Actor(args))
        for actor in self.pop: actor.eval()

        # self.workers = [Worker.remote(args) for _ in range(self.args.pop_size+1)]

        # args.is_cuda = True; args.is_memory_cuda = True
        self.rl_agent = ddpg.DDPG(args)
        # self.rl_agent.share_memory()

        self.ounoise = ddpg.OUNoise(args.action_dim)
        # self.replay_queue = mp.Manager().Queue()  # mp.Manager().list()
        # self.replay_queue = mp.Queue()
        # self.replay_memory = mp.Manager().list()
        # self.replay_memory = mp.Array()
        # self.replay_memory = mp.Queue()



        self.workers = self.pop.append(self.rl_agent.actor)

        # for key in range(self.args.pop_size):
        #     self.replay_memory[key] = replay_memory.ReplayMemory(self.args.buffer_size)

        # self.learner = LearnerThread(self.replay_memory, self.rl_agent)
        # self.learner.start()
        # Stats
        # self.timers = {
        #     k: TimerStat()
        #     for k in [
        #     "put_weights", "get_samples", "sample_processing",
        #     "replay_processing", "update_priorities", "train", "sample"
        # ]
        # }

        self.num_games = 0; self.num_frames = 0; self.gen_frames = 0; self.len_replay = 0
Exemplo n.º 5
0
    def test_crossover(self):
        source_dir = 'exp/cheetah_sm0.1_distil_save_20/models/'
        models = [1400, 1600, 1800, 2200]

        parent1 = []
        parent2 = []
        normal_cro = []
        distil_cro = []
        p1s, p2s, ncs, dcs = [], [], [], []
        for i, model1 in enumerate(models):
            for j, model2 in enumerate(models):
                if j > i:
                    print(
                        "========== Crossover between {} and {} =============="
                        .format(model1, model2))
                    critic = ddpg.Critic(self.args)
                    critic_path = os.path.join(
                        source_dir, 'evo_net_critic_{}.pkl'.format(model2))
                    critic.load_state_dict(torch.load(critic_path))

                    agent1 = self.load_genetic_agent(source_dir, model1)
                    agent2 = self.load_genetic_agent(source_dir, model2)

                    p1_reward, p1_states = self.evaluate(agent1)
                    p2_reward, p2_states = self.evaluate(agent2)
                    parent1.append(p1_reward)
                    parent2.append(p2_reward)
                    p1s.append(p1_states)
                    p2s.append(p2_states)

                    ssne = mod_neuro_evo.SSNE(self.args, critic, None)
                    child1 = ddpg.GeneticAgent(self.args)
                    child2 = ddpg.GeneticAgent(self.args)
                    ssne.clone(agent1, child1)
                    ssne.clone(agent2, child2)

                    ssne.crossover_inplace(child1, child2)

                    c1_reward, c1_states = self.evaluate(child1)
                    normal_cro.append(c1_reward)
                    ncs.append(c1_states)

                    child = ssne.distilation_crossover(agent1, agent2)
                    c_reward, c_states = self.evaluate(child)
                    distil_cro.append(c_reward)
                    dcs.append(c_states)

                    print(parent1[-1])
                    print(parent2[-1])
                    print(normal_cro[-1])
                    print(distil_cro[-1])
                    print()

        save_file = 'visualise/crossover'
        np.savez(save_file,
                 p1=parent1,
                 p2=parent2,
                 nc=normal_cro,
                 dc=distil_cro,
                 p1s=p1s,
                 p2s=p2s,
                 ncs=ncs,
                 dcs=dcs)