Python CMAES示例，es.CMAES Python示例

示例#1

0

显示文件

    def __init__(
            self,
            D="DefaultDir",
            H1=64,
            H2=64,
            P=100,
            G=5000,
            S=50000,
            E="TimePilot-ram-v0",
            wd=0.01,  #weight decay initialized to 0.01
            si=0.5):
        # HYPERPARAMETERS
        self.HL1 = H1
        self.HL2 = H2
        self.NPOP = P
        self.MAX_ITER = G
        self.STEPS = S

        self.dir = D

        # CONSTANTS
        self.STATE_SIZE = 128
        self.ACTION_SIZE = self.decisions_env(E)
        self.env = gym.make(E)
        self.env.reset()

        # CMA
        NPARAMS = (self.STATE_SIZE * self.HL1) + (self.HL1 * self.HL2) + (
            self.HL2 * self.ACTION_SIZE)
        cma = CMAES(NPARAMS, popsize=self.NPOP, weight_decay=wd, sigma_init=si)
        self.FINAL = self.Engine(cma)

示例#2

0

显示文件

文件： train.py 项目： wakeupppp/astool

def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
    global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filebase = 'log/' + gamename + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    game = config.games[gamename]
    model = make_model(game)
    num_params = model.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=0.005,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=0.005,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params, sigma_init=sigma_init, popsize=population)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=0.005,
                    popsize=population)
        es = pepg
    else:
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=0.005,
                     popsize=population)
        es = oes

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial

示例#3

0

显示文件

def problem4():
    # data_parition = 100000
    for trials in range(2):
        start = 100000 * trials
        end = 100000 * (trials + 1)
        pdis = PDIS(behavior_file="data.csv", start_index=start, end_index=end)
        pdis.calculate_pi_b()

        # for trials in range(10):
        NPARAMS = 4  # make this a 100-dimensinal problem.
        NPOPULATION = 75  # use population size of 101.
        MAX_ITERATION = 6
        cmaes = CMAES(NPARAMS,
                      popsize=NPOPULATION,
                      weight_decay=0.0,
                      sigma_init=0.5
                      )
        history = []
        print("candidate_data: ", start, end, "Population: ", NPOPULATION, "Max Iteration: ", MAX_ITERATION)
        for j in range(MAX_ITERATION):
            solutions = cmaes.ask()
            fitness_list = np.zeros(cmaes.popsize)
            for i in range(cmaes.popsize):
                fitness_list[i] = pdis.upper_bound(solutions[i])
            cmaes.tell(fitness_list)
            result = cmaes.result()  # first element is the best solution, second element is the best fitness
            history.append(result[1])
            if (j + 1) % 2 == 0:
                print("fitness at iteration", (j + 1), result[1])
        print("local optimum discovered by solver:\n", result[0])
        print("fitness score at this local optimum:", result[1])
        # return history
        pdis.execute_safety_test(result[0], trials)

示例#4

0

显示文件

def main():
    commands = sys.argv

    if ("solve" in commands):
        solver = CMAES(
            num_params=N_NEURONS,
            sigma_init=0.50,  # initial standard deviation
            popsize=1000,  # population size
            weight_decay=0.01)  # weight decay coefficient

        # solver = SimpleGA(N_NEURONS)
        test_solver(solver)
    elif ("test" in commands):
        init = np.loadtxt(FILE_TEST)
        evaluate(init, True, False)

示例#5

0

显示文件

文件： mpi4py_agent.py 项目： JellePiepenbrock/world_models

def initialize_settings(c, r, sigma_init=0.1, sigma_decay=0.9999, init_opt=""):
    global es, model, comm, rank
    comm, rank = c, r
    model = make_model(sys.argv[1])

    num_params = model.param_count

    if len(init_opt) > 0:
        es = pickle.load(open(init_opt, "rb"))
    else:
        if config.OPTIMIZER == "cma":
            cma = CMAES(num_params,
                        sigma_init=sigma_init,
                        popsize=config.POPULATION)
            es = cma

    global PRECISION
    PRECISION = 10000
    global SOLUTION_PACKET_SIZE
    SOLUTION_PACKET_SIZE = (5 + num_params) * config.NUM_WORKER_TRIAL
    global RESULT_PACKET_SIZE
    RESULT_PACKET_SIZE = 4 * config.NUM_WORKER_TRIAL

    return es, model, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE

示例#6

0

显示文件

文件： ga.py 项目： lshoek/creative-evo-controller

    def run(self, max_generations, folder, ga_id='', init_solution_id=''):
        if (ga_id == ''):
            ga_id = self.init_time

        # disk
        results_dir = os.path.join(folder, ga_id)
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)

        fitness_path = os.path.join(
            results_dir, 'fitness.txt'
        )  # most important fitness results per run (for plotting)
        ind_fitness_path = os.path.join(
            results_dir,
            'ind_fitness.txt')  # more detailed fitness results per individual
        solver_path = os.path.join(
            results_dir, "solver.pkl")  # contains the current population
        best_solver_path = os.path.join(
            results_dir, "best_solver.pkl")  # contains the current population
        init_solution_path = os.path.join(
            os.path.join(folder, init_solution_id),
            "solver.pkl")  # path to initial solution solver

        current_generation = 0
        P = self.P
        best_f = -sys.maxsize

        # initialize controller instance to be saved
        from models.controller import Controller
        best_controller = Controller(P[0].input_size, P[0].output_size)

        # initialize cma es (start from scratch or load previously saved solver/population)
        resume = False
        if os.path.exists(solver_path):
            resume = True
            self.solver = pickle.load(open(solver_path, 'rb'))
            new_results = self.solver.result()
            best_f = new_results[1]

            if os.path.exists(fitness_path):
                with open(fitness_path, 'r') as f:
                    lines = f.read().splitlines()
                    last_line = lines[-1]
                    current_generation = int(last_line.split('/')[0])
        # start from scratch but with an initial solution param
        elif os.path.exists(init_solution_path):
            tmp_solver = pickle.load(open(init_solution_path, 'rb'))
            self.solver = CMAES(num_params=self.num_controller_params,
                                solution_init=tmp_solver.best_param(),
                                sigma_init=0.1,
                                popsize=self.pop_size)
        # completely start from scratch
        else:
            self.solver = CMAES(num_params=self.num_controller_params,
                                sigma_init=0.1,
                                popsize=self.pop_size)

        if not resume:
            with open(fitness_path, 'a') as file:
                file.write('gen/avg/cur/best\n')
            with open(ind_fitness_path, 'a') as file:
                file.write(
                    'gen/id/fitness/coverage/coverageReward/IC/PC/PCt0/PCt1\n')

        while current_generation < max_generations:

            fitness = np.zeros(self.pop_size)
            results_full = np.zeros(self.pop_size)

            print(f'Generation {current_generation}')
            print(f'Evaluating individuals: {len(P)}')

            # ask the ES to give us a set of candidate solutions
            solutions = self.solver.ask()

            # evaluate all candidates
            for i, s in enumerate(P):
                set_controller_weights(s.controller, solutions[i])
                s.run_solution(generation=current_generation, local_id=i)

            # request fitness from simulator
            results_full = Client(ClientType.REQUEST).start()
            fitness = results_full[:, 0]

            for i, s in enumerate(P):
                s.fitness = fitness[i]

            current_f = np.max(fitness)
            average_f = np.mean(fitness)
            print(
                f'Current best: {current_f}\nCurrent average: {average_f}\nAll-time best: {best_f}'
            )

            # return rewards to ES for param update
            self.solver.tell(fitness)

            max_index = np.argmax(fitness)
            new_results = self.solver.result()

            # process results
            pickle.dump(self.solver, open(solver_path, 'wb'))
            if current_f > best_f:
                set_controller_weights(best_controller, solutions[max_index])
                torch.save(best_controller,
                           os.path.join(results_dir, 'best_controller.pth'))

                # Save solver and change level to a random one
                pickle.dump(self.solver, open(best_solver_path, 'wb'))
                best_f = current_f

            for i, s in enumerate(P):
                # fitness/coverage/coverageReward/IC/PC/PCt0/PCt1
                res = results_full[i, :]
                res_str = ('/'.join(['%.6f'] * len(res))) % tuple(res)

                with open(ind_fitness_path, 'a') as file:
                    file.write('%d/%d/%s\n' % (current_generation, i, res_str))

            res_str = '%d/%f/%f/%f' % (current_generation, average_f,
                                       current_f, best_f)
            print(f'gen/avg/cur/best : {res_str}')
            with open(fitness_path, 'a') as file:
                file.write(f'{res_str}\n')

            if (i > max_generations):
                break

            gc.collect()
            current_generation += 1

        print('Finished')

示例#7

0

显示文件

文件： ga.py 项目： lshoek/creative-evo-controller

class GA:
    def __init__(self, timelimit, pop_size, device):
        self.pop_size = pop_size
        self.truncation_threshold = int(pop_size /
                                        2)  # Should be dividable by two
        self.P = []

        # unique GA id
        self.init_time = datetime.now().strftime("%Y%m%d_%H%M%S")

        # load configuration params
        with open('config/creature.json') as f:
            config = json.load(f)
            model_fromdisk = config.get('vae.model.fromdisk')
            model_path = config.get('vae.model.path')

            latent_size = config.get('vae.latent.size')
            obs_size = config.get('vae.obs.size')
            num_effectors = config.get('joints.size') + config.get(
                'brushes.size')
            input_size = latent_size + num_effectors
            output_size = num_effectors

            cpg_enabled = config.get('cpg.enabled')
            if cpg_enabled:
                input_size += 1
                output_size += 1

        # load vision module
        from models.vae import VAE
        vae = VAE(latent_size).cuda()

        if model_fromdisk:
            vae.load_state_dict(torch.load(model_path))
            vae.eval()  # inference mode
            print(f'Loaded VAE model {model_path} from disk')

        print(f'Generating initial population of {pop_size} candidates...')

        # initialize population
        from train import GAIndividual
        for _ in range(pop_size):
            self.P.append(
                GAIndividual(self.init_time,
                             input_size,
                             output_size,
                             obs_size,
                             compressor=vae,
                             cpg_enabled=cpg_enabled,
                             device=device,
                             time_limit=timelimit))

        # report controller parameters
        self.num_controller_params = input_size * output_size + output_size
        print(f'Number of controller parameters: {self.num_controller_params}')

    def run(self, max_generations, folder, ga_id='', init_solution_id=''):
        if (ga_id == ''):
            ga_id = self.init_time

        # disk
        results_dir = os.path.join(folder, ga_id)
        if not os.path.exists(results_dir):
            os.makedirs(results_dir)

        fitness_path = os.path.join(
            results_dir, 'fitness.txt'
        )  # most important fitness results per run (for plotting)
        ind_fitness_path = os.path.join(
            results_dir,
            'ind_fitness.txt')  # more detailed fitness results per individual
        solver_path = os.path.join(
            results_dir, "solver.pkl")  # contains the current population
        best_solver_path = os.path.join(
            results_dir, "best_solver.pkl")  # contains the current population
        init_solution_path = os.path.join(
            os.path.join(folder, init_solution_id),
            "solver.pkl")  # path to initial solution solver

        current_generation = 0
        P = self.P
        best_f = -sys.maxsize

        # initialize controller instance to be saved
        from models.controller import Controller
        best_controller = Controller(P[0].input_size, P[0].output_size)

        # initialize cma es (start from scratch or load previously saved solver/population)
        resume = False
        if os.path.exists(solver_path):
            resume = True
            self.solver = pickle.load(open(solver_path, 'rb'))
            new_results = self.solver.result()
            best_f = new_results[1]

            if os.path.exists(fitness_path):
                with open(fitness_path, 'r') as f:
                    lines = f.read().splitlines()
                    last_line = lines[-1]
                    current_generation = int(last_line.split('/')[0])
        # start from scratch but with an initial solution param
        elif os.path.exists(init_solution_path):
            tmp_solver = pickle.load(open(init_solution_path, 'rb'))
            self.solver = CMAES(num_params=self.num_controller_params,
                                solution_init=tmp_solver.best_param(),
                                sigma_init=0.1,
                                popsize=self.pop_size)
        # completely start from scratch
        else:
            self.solver = CMAES(num_params=self.num_controller_params,
                                sigma_init=0.1,
                                popsize=self.pop_size)

        if not resume:
            with open(fitness_path, 'a') as file:
                file.write('gen/avg/cur/best\n')
            with open(ind_fitness_path, 'a') as file:
                file.write(
                    'gen/id/fitness/coverage/coverageReward/IC/PC/PCt0/PCt1\n')

        while current_generation < max_generations:

            fitness = np.zeros(self.pop_size)
            results_full = np.zeros(self.pop_size)

            print(f'Generation {current_generation}')
            print(f'Evaluating individuals: {len(P)}')

            # ask the ES to give us a set of candidate solutions
            solutions = self.solver.ask()

            # evaluate all candidates
            for i, s in enumerate(P):
                set_controller_weights(s.controller, solutions[i])
                s.run_solution(generation=current_generation, local_id=i)

            # request fitness from simulator
            results_full = Client(ClientType.REQUEST).start()
            fitness = results_full[:, 0]

            for i, s in enumerate(P):
                s.fitness = fitness[i]

            current_f = np.max(fitness)
            average_f = np.mean(fitness)
            print(
                f'Current best: {current_f}\nCurrent average: {average_f}\nAll-time best: {best_f}'
            )

            # return rewards to ES for param update
            self.solver.tell(fitness)

            max_index = np.argmax(fitness)
            new_results = self.solver.result()

            # process results
            pickle.dump(self.solver, open(solver_path, 'wb'))
            if current_f > best_f:
                set_controller_weights(best_controller, solutions[max_index])
                torch.save(best_controller,
                           os.path.join(results_dir, 'best_controller.pth'))

                # Save solver and change level to a random one
                pickle.dump(self.solver, open(best_solver_path, 'wb'))
                best_f = current_f

            for i, s in enumerate(P):
                # fitness/coverage/coverageReward/IC/PC/PCt0/PCt1
                res = results_full[i, :]
                res_str = ('/'.join(['%.6f'] * len(res))) % tuple(res)

                with open(ind_fitness_path, 'a') as file:
                    file.write('%d/%d/%s\n' % (current_generation, i, res_str))

            res_str = '%d/%f/%f/%f' % (current_generation, average_f,
                                       current_f, best_f)
            print(f'gen/avg/cur/best : {res_str}')
            with open(fitness_path, 'a') as file:
                file.write(f'{res_str}\n')

            if (i > max_generations):
                break

            gc.collect()
            current_generation += 1

        print('Finished')

示例#8

0

显示文件

文件： final_run_v2.py 项目： MalharJ/scratch_that

        	NPOPULATION = 80    # use population size of 200.
        	MAX_ITERATION = 5000 # run each solver for 4000 generations.
        	import numpy as np

        	fitness_option = 0
        	original_class = corresponding_index_labels[iterations]

       		filename = 'log_vgg19/imageName_' + random_filenames[iterations]
        	filename += '_fitness_' + str(fitness_option)
        	filename += '_target=' + str(target_class) 
        	filename += '_original=' + str(original_class)
        	filename += '_npop=' + str(NPOPULATION)
        	filename += '_iters=' + str(MAX_ITERATION)
        	filename += 'range_limit'
        	filename += '_x0=' + str(xpoint)
        	filename += '_y0=' + str(ypoint)
        	filename += '_x1=' + str(xpoint2)
        	filename += '_y1=' + str(ypoint2)
        	log_file= open(filename,"w")

        	# defines CMA-ES algorithm solver for each target class
        	cmaes = CMAES(NPARAMS,
                	popsize=NPOPULATION,
                	sigma_init = 0.5
              		)

        # Solve a k dimensional problem, and choose 4000 descendants at each iteration
        	iters, scratch, probabilities_all, orig_probs, target_probs = genetic_solver(xpoint,xpoint2,ypoint,ypoint2,cmaes, input_to_model, original_class,target_class, fitness_option, log_file, random_filenames[iterations])
		print ('Iterations taken: ', iters)
        	log_file.write('Iterations taken: ' + str(iters))

示例#9

0

显示文件

)

oes = OpenES(
    NPARAMS,  # number of model parameters
    sigma_init=0.5,  # initial standard deviation
    sigma_decay=0.999,  # don't anneal standard deviation
    learning_rate=0.1,  # learning rate for standard deviation
    learning_rate_decay=1.0,  # annealing the learning rate
    popsize=NPOPULATION,  # population size
    antithetic=False,  # whether to use antithetic sampling
    weight_decay=0.00,  # weight decay coefficient
    rank_fitness=False,  # use rank rather than fitness numbers
    forget_best=False)

# defines CMA-ES algorithm solver
cmaes = CMAES(NPARAMS, popsize=NPOPULATION, weight_decay=0.0, sigma_init=0.5)

print(mp.cpu_count())
pool = mp.Pool(mp.cpu_count())
fit_func = evluate_func


# defines a function to use solver to solve fit_func
def test_solver(solver):
    history = []
    j = 0
    seed_width = 20
    while True:
        solutions = solver.ask()
        fitness_list = np.zeros(solver.popsize)
        #print(solutions)

示例#10

0

显示文件

文件： 05_train_controller.py 项目： dsvilarkovic/world-models-spatial-temporal-awareness

def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''):
  global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
  population = num_worker * num_worker_trial
  filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population)
  controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population)

  model = make_model()

  num_params = model.param_count
  #print("size of model", num_params)

  if len(init_opt) > 0:
    es = pickle.load(open(init_opt, 'rb'))  
  else:
    if optimizer == 'ses':
      ses = PEPG(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_alpha=0.2,
        sigma_limit=0.02,
        elite_ratio=0.1,
        weight_decay=0.005,
        popsize=population)
      es = ses
    elif optimizer == 'ga':
      ga = SimpleGA(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_limit=0.02,
        elite_ratio=0.1,
        weight_decay=0.005,
        popsize=population)
      es = ga
    elif optimizer == 'cma':
      cma = CMAES(num_params,
        sigma_init=sigma_init,
        popsize=population)
      es = cma
    elif optimizer == 'pepg':
      pepg = PEPG(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_alpha=0.20,
        sigma_limit=0.02,
        learning_rate=0.01,
        learning_rate_decay=1.0,
        learning_rate_limit=0.01,
        weight_decay=0.005,
        popsize=population)
      es = pepg
    else:
      oes = OpenES(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_limit=0.02,
        learning_rate=0.01,
        learning_rate_decay=1.0,
        learning_rate_limit=0.01,
        antithetic=antithetic,
        weight_decay=0.005,
        popsize=population)
      es = oes

  PRECISION = 10000
  SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial
  RESULT_PACKET_SIZE = 4*num_worker_trial

示例#11

0

显示文件

文件： train.py 项目： hlynurd/wm-norb

def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
    global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filedir = 'results/{}/{}/log/'.format(exp_name, env_name)
    if not os.path.exists(filedir):
        os.makedirs(filedir)
    filebase = filedir + env_name + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    controller = make_controller(args=config_args)

    num_params = controller.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=0.005,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=0.005,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params, sigma_init=sigma_init, popsize=population)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=0.005,
                    popsize=population)
        es = pepg
    else:
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=0.005,
                     popsize=population)
        es = oes

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial

示例#12

0

显示文件

文件： moroder_cma.py 项目： mr-mecha/moroder

# this is why it is required to split the code in a main script (this) and a function script (functions.py)

import matplotlib.pyplot as plt

if __name__ == '__main__':

    # starting parallel workers
    multiprocessing.set_start_method('spawn', True)
    workers = Pool(functions.num_workers)

    # training phase (Tensorflow graph activation, performing CMA-ES optimisation)
    with functions.sess.as_default():
        functions.sess.run(tf.compat.v1.global_variables_initializer())
        saver = tf.compat.v1.train.Saver()
        cma = CMAES(functions.NPARAMS,
                    sigma_init=functions.sigma,
                    weight_decay=0,
                    popsize=functions.NPOPULATION)
    cma_history = functions.solve(cma, workers)

    # loading learned gait after training
    with open(functions.save_dir, 'rb') as f:
        bestparams = pickle.load(f)
    with open(functions.max_fit_dir, 'rb') as f:
        history = pickle.load(f)

    # testing learned gait
    functions.env[0].reset()
    time.sleep(2)
    fitness = functions.fitness_func(bestparams, True)

    # showing reward/time graph

示例#13

0

显示文件

def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
  global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode
  population = num_worker * num_worker_trial
  os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True)
  filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id
  if novelty_search:
    filebase = filebase + '.novelty'
  if novelty_mode == 'h':
    BC_SIZE = H_SIZE
  elif novelty_mode == 'z':
    BC_SIZE = Z_SIZE
  elif novelty_mode =='h_concat':
    BC_SIZE = BC_SEQ_LENGTH * H_SIZE
    #NOVELTY_THRESHOLD = 180
  elif novelty_mode == 'z_concat':
    BC_SIZE = BC_SEQ_LENGTH * Z_SIZE
  elif novelty_mode == 'a_concat':
    BC_SIZE = BC_SEQ_LENGTH * A_SIZE
  else:
    BC_SIZE = 9  # dummy bc size not used because the reward if the distance travelled.

  if novelty_mode:
    filebase = filebase + '.' + novelty_mode

  if ns_mode:
    filebase = filebase + '.' + ns_mode

  model = make_model(model_name, load_model=True)
  num_params = model.param_count
  print("size of model", num_params)
  PRECISION = 10000
  SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
  RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial

  if optimizer == 'ses':
    ses = PEPG(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_alpha=0.2,
      sigma_limit=0.02,
      elite_ratio=0.1,
      weight_decay=0.005,
      popsize=population)
    es = ses
  elif optimizer == 'ga':
    ga = SimpleGA(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_limit=0.02,
      elite_ratio=0.1,
      weight_decay=0.005,
      popsize=population)
    es = ga
  elif optimizer == 'cma':
    cma = CMAES(num_params,
      sigma_init=sigma_init,
      popsize=population)
    es = cma
  elif optimizer == 'pepg':
    pepg = PEPG(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_alpha=0.20,
      sigma_limit=0.02,
      learning_rate=0.01,
      learning_rate_decay=1.0,
      learning_rate_limit=0.01,
      weight_decay=0.005,
      popsize=population)
    es = pepg
  else:
    oes = OpenES(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_limit=0.02,
      learning_rate=0.01,
      learning_rate_decay=1.0,
      learning_rate_limit=0.01,
      antithetic=antithetic,
      weight_decay=0.005,
      popsize=population)
    es = oes

示例#14

0

显示文件

def testRun():
    config()
    x = np.random.randn(NPARAMS)
    print("The fitness of initial guess", fit_func(x))

    pepg = PEPG(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)  # don't keep the historical best solution)

    pepg_history = test_solver(pepg)  #

    pepgV = PEPGVariant(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False,  # don't keep the historical best solution
        diversity_best=0.1)  # use the diversity issue for just testing

    print("-----test PEPG vairant-----")
    pepgv_history = test_solver(pepgV)  #

    print("---test PEPG variant with different diversity-----")

    pepgV2 = PEPGVariant(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False,  # don't keep the historical best solution
        diversity_best=1)  # use the diversity issue for just testing

    # done

    pepgV2_history = test_solver(pepgV2)

    oes = OpenES(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        sigma_decay=0.999,  # don't anneal standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # annealing the learning rate
        popsize=NPOPULATION,  # population size
        antithetic=False,  # whether to use antithetic sampling
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)

    print("-----test oes--------------")
    oes_history = test_solver(oes)

    cmaes = CMAES(NPARAMS,
                  popsize=NPOPULATION,
                  weight_decay=0.0,
                  sigma_init=0.5)
    cma_history = test_solver(cmaes)

    best_history = [0] * MAX_ITERATION
    plt.figure(figsize=(16, 8), dpi=150)

    optimum_line, = plt.plot(best_history,
                             color="black",
                             linewidth=0.5,
                             linestyle="-.",
                             label='Global Optimum')
    pepgv_line, = plt.plot(pepgv_history,
                           color="red",
                           linewidth=1.0,
                           linestyle="-",
                           label='PEPGV / NES')
    pepg_line, = plt.plot(pepg_history,
                          color="blue",
                          linewidth=1.0,
                          linestyle="-.",
                          label='PEPG / NES')
    oes_line, = plt.plot(oes_history,
                         color="orange",
                         linewidth=1.0,
                         linestyle="-",
                         label='OpenAI-ES')
    cma_line, = plt.plot(cma_history,
                         color="green",
                         linewidth=1.0,
                         linestyle="-",
                         label='CMA-ES')

    plt.legend(handles=[optimum_line, pepgv_line, pepg_line, oes_line],
               loc='best')

    plt.xlim(0, 100)

    plt.xlabel('generation')
    plt.ylabel('loss')

    plt.savefig("./results/rose_" + str(NPARAMS) + "d.svg")

示例#15

0

显示文件

        feed_dict = {self.position: jp, self.velocity: jv}
        dpval = self.sess.run(self.dp, feed_dict)
        return dpval[0]


agent = StandActor()

nbParams = agent.getNumberOfParams()

try:
    x0 = pickle.load(open("params" + ENV_NAME + ".p", "rb"))
except:
    print("no saved parameter found - starting from initialization")
    x0 = agent.getParameterValue()

solver = CMAES(x0)

#agent.loadParams( np.zeros( nbParams ) )


def evaluate(sol):
    agent.loadParams(sol)
    ob = env.reset()
    while True:
        action = agent.act(ob)
        ob, reward, done, _ = env.step(action)
        #time.sleep(0.01)
        if done:
            #print("episode done")
            #print(reward)
            gc.collect()

示例#16

0

显示文件

文件： train.py 项目： arturomf94/estool

def initialize_settings(sigma_init=0.1,
                        sigma_decay=0.9999,
                        weight_decay=0.005):
    global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filebase = 'log/' + gamename + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    game = config.games[gamename]
    model = make_model(game)
    num_params = model.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=weight_decay,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=weight_decay,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params,
                    sigma_init=sigma_init,
                    popsize=population,
                    weight_decay=weight_decay)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=weight_decay,
                    popsize=population)
        es = pepg
    elif optimizer == 'oes':
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=weight_decay,
                     popsize=population)
        es = oes
    # elif optimizer == 'pso':
    #   pso = PSO(num_params,
    #     sigma_init=sigma_init,
    #     weight_decay=weight_decay,
    #     popsize=population)
    #   es = pso
    elif optimizer == 'global_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='global')
        es = pso
    elif optimizer == 'local_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='local')
        es = pso
    elif optimizer == 'random_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='random')
        es = pso
    else:
        if optimizer in list(sorted(ng.optimizers.registry.keys())):
            ng_optimizer = Nevergrad(optimizer,
                                     num_params,
                                     sigma_init=sigma_init,
                                     popsize=population,
                                     weight_decay=weight_decay)
            es = ng_optimizer
        else:
            raise ValueError('Could not find optimizer!')

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial