示例#1
0
文件: train.py 项目: wakeupppp/astool
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
    global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filebase = 'log/' + gamename + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    game = config.games[gamename]
    model = make_model(game)
    num_params = model.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=0.005,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=0.005,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params, sigma_init=sigma_init, popsize=population)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=0.005,
                    popsize=population)
        es = pepg
    else:
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=0.005,
                     popsize=population)
        es = oes

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial
示例#2
0
def debugRun():

    config()
    x = np.random.randn(NPARAMS)
    print("The fitness of initial guess", fit_func(x))

    # oes = OpenES(NPARAMS,                  # number of model parameters
    #     sigma_init=0.5,            # initial standard deviation
    #     sigma_decay=0.999,         # don't anneal standard deviation
    #     learning_rate=0.1,         # learning rate for standard deviation
    #     learning_rate_decay = 1.0, # annealing the learning rate
    #     popsize=NPOPULATION,       # population size
    #     antithetic=False,          # whether to use antithetic sampling
    #     weight_decay=0.00,         # weight decay coefficient
    #     rank_fitness=False,        # use rank rather than fitness numbers
    #     forget_best=False)

    # print("-----test oes--------------")
    pepg = PEPG(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)  # don't keep the historical best solution)

    # pepg_history = test_solver(pepg)  #

    history = debug_solver(pepg)

    history = np.array(history)

    print(history.shape)  # done

    pickle_out = open("pepg_rose.pickle", "wb")

    pickle.dump(history, pickle_out)

    pickle_out.close()
示例#3
0
ga_history = test_solver(ga)

cmaes = CMAES(NPARAMS,
              popsize=NPOPULATION,
              weight_decay=0.0,
              sigma_init = 0.5
          )

cma_history = test_solver(cmaes)

pepg = PEPG(NPARAMS,                         # number of model parameters
            sigma_init=0.5,                  # initial standard deviation
            learning_rate=0.1,               # learning rate for standard deviation
            learning_rate_decay=1.0,       # don't anneal the learning rate
            popsize=NPOPULATION,             # population size
            average_baseline=False,          # set baseline to average of batch
            weight_decay=0.00,            # weight decay coefficient
            rank_fitness=False,           # use rank rather than fitness numbers
            forget_best=False)     
pepg_history = test_solver(pepg)


oes = OpenES(NPARAMS,                  # number of model parameters
            sigma_init=0.5,            # initial standard deviation
            sigma_decay=0.999,         # don't anneal standard deviation
            learning_rate=0.1,         # learning rate for standard deviation
            learning_rate_decay = 1.0, # annealing the learning rate
            popsize=NPOPULATION,       # population size
            antithetic=False,          # whether to use antithetic sampling
            weight_decay=0.00,         # weight decay coefficient
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999, init_opt = ''):
  global population, filebase, controller_filebase, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
  population = num_worker * num_worker_trial
  filebase = './log/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population)
  controller_filebase = './controller/'+env_name+'.'+optimizer+'.'+str(num_episode)+'.'+str(population)

  model = make_model()

  num_params = model.param_count
  #print("size of model", num_params)

  if len(init_opt) > 0:
    es = pickle.load(open(init_opt, 'rb'))  
  else:
    if optimizer == 'ses':
      ses = PEPG(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_alpha=0.2,
        sigma_limit=0.02,
        elite_ratio=0.1,
        weight_decay=0.005,
        popsize=population)
      es = ses
    elif optimizer == 'ga':
      ga = SimpleGA(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_limit=0.02,
        elite_ratio=0.1,
        weight_decay=0.005,
        popsize=population)
      es = ga
    elif optimizer == 'cma':
      cma = CMAES(num_params,
        sigma_init=sigma_init,
        popsize=population)
      es = cma
    elif optimizer == 'pepg':
      pepg = PEPG(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_alpha=0.20,
        sigma_limit=0.02,
        learning_rate=0.01,
        learning_rate_decay=1.0,
        learning_rate_limit=0.01,
        weight_decay=0.005,
        popsize=population)
      es = pepg
    else:
      oes = OpenES(num_params,
        sigma_init=sigma_init,
        sigma_decay=sigma_decay,
        sigma_limit=0.02,
        learning_rate=0.01,
        learning_rate_decay=1.0,
        learning_rate_limit=0.01,
        antithetic=antithetic,
        weight_decay=0.005,
        popsize=population)
      es = oes

  PRECISION = 10000
  SOLUTION_PACKET_SIZE = (4+num_params)*num_worker_trial
  RESULT_PACKET_SIZE = 4*num_worker_trial
示例#5
0
def train_controller_pepg():

    # env_name = "SonicTheHedgehog-Genesis" # None
    env_name = "SonicTheHedgehog2-Genesis"
    # env_name = "SonicAndKnuckles-Genesis"
    # env_name = "SonicTheHedgehog3-Genesis"
    # env_name = "SonicAndKnuckles3-Genesis"

    env = retro.make(env_name)
    # print(env.observation_space) # Box(224, 320, 3)
    # print(env.action_space) # MultiBinary(12)
    # print(env.action_space.sample()) # [1 1 1 0 1 0 1 0 0 1 1 1]

    conv_vae_filename = "weights/conv_vae_SonicAndKnuckles.pkl"  # 3, 4608
    lstm_mdn_filename = "weights/lstm_mdn_SonicAndKnuckles.pkl"  # 4608
    controller_filename = "weights/controller_6656_12.pkl"

    # conv_vae_filename = "weights/conv_vae_gray_edges.pkl" # 1, 1024
    # lstm_mdn_filename = "weights/lstm_mdn_gray_edges.pkl" # 1024
    # controller_filename = "weights/controller_rnn_1024_12.pkl"

    # only forward pass
    conv_vae = ConvVAE((3, 128, 128), 4608)
    if os.path.exists(conv_vae_filename):
        print("loading conv vae weights")
        conv_vae.load_state_dict(torch.load(conv_vae_filename))

    # only forward pass
    lstm_mdn = LSTM(vector_size=4608)
    if os.path.exists(lstm_mdn_filename):
        print("loading lstm mdn weights")
        lstm_mdn.load_state_dict(torch.load(lstm_mdn_filename))

    controller = Controller(input_size=6656, action_size=12)  # 6656
    if os.path.exists(controller_filename):
        print("loading controller weights")
        controller.load_state_dict(torch.load(controller_filename))

    # solver = CMAES(num_params=79884, sigma_init=4, popsize=100)
    solver = PEPG(num_params=79884,
                  sigma_init=4,
                  elite_ratio=0.25,
                  popsize=100,
                  forget_best=False)
    solver_sigma_mu_weights_filename = "weights/solver_sigma_mu_weights_34_0.30942985.npz"
    print("load sigma mu to solver")
    data = np.load(solver_sigma_mu_weights_filename)
    solver.mu = data["mu"]
    solver.sigma = data["sigma"]
    ## save sigma mu
    # pepg_mu = solver.mu
    # pepg_sigma = solver.sigma
    # np.savez(solver_sigma_mu_filename, mu=pepg_mu, sigma=pepg_sigma)

    # params = list(controller.parameters())
    # weight = params[0] # [12, 6656]  79 872
    # bias = params[1] # [12]
    # # summ: 79 884
    # weight = weight.view(-1) # 79872
    #
    # weights = torch.cat((weight, bias), dim=0) # [79884]

    generations = 40000
    for generation in range(generations):

        solutions = solver.ask()  # (40, 79884)

        fitness_list = np.zeros(solver.popsize)  # (40,)

        for i in range(solver.popsize):
            fitness_list[i] = evaluate(solutions[i],
                                       conv_vae,
                                       lstm_mdn,
                                       controller,
                                       env,
                                       n_steps=512)
            print(i, fitness_list[i])

        solver.tell(fitness_list)

        result = solver.result()
        # first element is the best solution, second element is the best fitness
        # print(result[0]) # (79884,)
        # print(result[1]) # -10732.263849138297

        print(generation, result[1])

        ###############################
        ## save solver sigma mu weights
        print("save pepg data")
        solver_sigma_mu_filename = "weights/solver_sigma_mu_weights_%s_%s.npz" % (
            generation, result[1])
        pepg_mu = solver.mu
        pepg_sigma = solver.sigma
        weights = result[0]
        np.savez(solver_sigma_mu_filename,
                 mu=pepg_mu,
                 sigma=pepg_sigma,
                 weights=weights)

        # save controller weights
        print("save controller weights")
        torch.save(controller.state_dict(), controller_filename)
示例#6
0
文件: train.py 项目: hlynurd/wm-norb
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
    global population, filebase, game, controller, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filedir = 'results/{}/{}/log/'.format(exp_name, env_name)
    if not os.path.exists(filedir):
        os.makedirs(filedir)
    filebase = filedir + env_name + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    controller = make_controller(args=config_args)

    num_params = controller.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=0.005,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=0.005,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params, sigma_init=sigma_init, popsize=population)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=0.005,
                    popsize=population)
        es = pepg
    else:
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=0.005,
                     popsize=population)
        es = oes

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial
示例#7
0
def initialize_settings(sigma_init=0.1, sigma_decay=0.9999):
  global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE, model_name, novelty_search, unique_id, novelty_mode, BC_SIZE, ns_mode
  population = num_worker * num_worker_trial
  os.makedirs(os.path.join(ROOT, 'log'), exist_ok=True)
  filebase = os.path.join(ROOT, 'log', gamename+'.'+optimizer+'.'+ model_name + '.' + str(num_episode)+'.'+str(population)) + '.' + unique_id
  if novelty_search:
    filebase = filebase + '.novelty'
  if novelty_mode == 'h':
    BC_SIZE = H_SIZE
  elif novelty_mode == 'z':
    BC_SIZE = Z_SIZE
  elif novelty_mode =='h_concat':
    BC_SIZE = BC_SEQ_LENGTH * H_SIZE
    #NOVELTY_THRESHOLD = 180
  elif novelty_mode == 'z_concat':
    BC_SIZE = BC_SEQ_LENGTH * Z_SIZE
  elif novelty_mode == 'a_concat':
    BC_SIZE = BC_SEQ_LENGTH * A_SIZE
  else:
    BC_SIZE = 9  # dummy bc size not used because the reward if the distance travelled.

  if novelty_mode:
    filebase = filebase + '.' + novelty_mode

  if ns_mode:
    filebase = filebase + '.' + ns_mode

  model = make_model(model_name, load_model=True)
  num_params = model.param_count
  print("size of model", num_params)
  PRECISION = 10000
  SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
  RESULT_PACKET_SIZE = (4 + BC_SIZE) * num_worker_trial

  if optimizer == 'ses':
    ses = PEPG(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_alpha=0.2,
      sigma_limit=0.02,
      elite_ratio=0.1,
      weight_decay=0.005,
      popsize=population)
    es = ses
  elif optimizer == 'ga':
    ga = SimpleGA(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_limit=0.02,
      elite_ratio=0.1,
      weight_decay=0.005,
      popsize=population)
    es = ga
  elif optimizer == 'cma':
    cma = CMAES(num_params,
      sigma_init=sigma_init,
      popsize=population)
    es = cma
  elif optimizer == 'pepg':
    pepg = PEPG(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_alpha=0.20,
      sigma_limit=0.02,
      learning_rate=0.01,
      learning_rate_decay=1.0,
      learning_rate_limit=0.01,
      weight_decay=0.005,
      popsize=population)
    es = pepg
  else:
    oes = OpenES(num_params,
      sigma_init=sigma_init,
      sigma_decay=sigma_decay,
      sigma_limit=0.02,
      learning_rate=0.01,
      learning_rate_decay=1.0,
      learning_rate_limit=0.01,
      antithetic=antithetic,
      weight_decay=0.005,
      popsize=population)
    es = oes
示例#8
0
def testRun():
    config()
    x = np.random.randn(NPARAMS)
    print("The fitness of initial guess", fit_func(x))

    pepg = PEPG(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)  # don't keep the historical best solution)

    pepg_history = test_solver(pepg)  #

    pepgV = PEPGVariant(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False,  # don't keep the historical best solution
        diversity_best=0.1)  # use the diversity issue for just testing

    print("-----test PEPG vairant-----")
    pepgv_history = test_solver(pepgV)  #

    print("---test PEPG variant with different diversity-----")

    pepgV2 = PEPGVariant(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # don't anneal the learning rate
        popsize=NPOPULATION,  # population size
        average_baseline=False,  # set baseline to average of batch
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False,  # don't keep the historical best solution
        diversity_best=1)  # use the diversity issue for just testing

    # done

    pepgV2_history = test_solver(pepgV2)

    oes = OpenES(
        NPARAMS,  # number of model parameters
        sigma_init=0.5,  # initial standard deviation
        sigma_decay=0.999,  # don't anneal standard deviation
        learning_rate=0.1,  # learning rate for standard deviation
        learning_rate_decay=1.0,  # annealing the learning rate
        popsize=NPOPULATION,  # population size
        antithetic=False,  # whether to use antithetic sampling
        weight_decay=0.00,  # weight decay coefficient
        rank_fitness=False,  # use rank rather than fitness numbers
        forget_best=False)

    print("-----test oes--------------")
    oes_history = test_solver(oes)

    cmaes = CMAES(NPARAMS,
                  popsize=NPOPULATION,
                  weight_decay=0.0,
                  sigma_init=0.5)
    cma_history = test_solver(cmaes)

    best_history = [0] * MAX_ITERATION
    plt.figure(figsize=(16, 8), dpi=150)

    optimum_line, = plt.plot(best_history,
                             color="black",
                             linewidth=0.5,
                             linestyle="-.",
                             label='Global Optimum')
    pepgv_line, = plt.plot(pepgv_history,
                           color="red",
                           linewidth=1.0,
                           linestyle="-",
                           label='PEPGV / NES')
    pepg_line, = plt.plot(pepg_history,
                          color="blue",
                          linewidth=1.0,
                          linestyle="-.",
                          label='PEPG / NES')
    oes_line, = plt.plot(oes_history,
                         color="orange",
                         linewidth=1.0,
                         linestyle="-",
                         label='OpenAI-ES')
    cma_line, = plt.plot(cma_history,
                         color="green",
                         linewidth=1.0,
                         linestyle="-",
                         label='CMA-ES')

    plt.legend(handles=[optimum_line, pepgv_line, pepg_line, oes_line],
               loc='best')

    plt.xlim(0, 100)

    plt.xlabel('generation')
    plt.ylabel('loss')

    plt.savefig("./results/rose_" + str(NPARAMS) + "d.svg")
示例#9
0
def initialize_settings(sigma_init=0.1,
                        sigma_decay=0.9999,
                        weight_decay=0.005):
    global population, filebase, game, model, num_params, es, PRECISION, SOLUTION_PACKET_SIZE, RESULT_PACKET_SIZE
    population = num_worker * num_worker_trial
    filebase = 'log/' + gamename + '.' + optimizer + '.' + str(
        num_episode) + '.' + str(population)
    game = config.games[gamename]
    model = make_model(game)
    num_params = model.param_count
    print("size of model", num_params)

    if optimizer == 'ses':
        ses = PEPG(num_params,
                   sigma_init=sigma_init,
                   sigma_decay=sigma_decay,
                   sigma_alpha=0.2,
                   sigma_limit=0.02,
                   elite_ratio=0.1,
                   weight_decay=weight_decay,
                   popsize=population)
        es = ses
    elif optimizer == 'ga':
        ga = SimpleGA(num_params,
                      sigma_init=sigma_init,
                      sigma_decay=sigma_decay,
                      sigma_limit=0.02,
                      elite_ratio=0.1,
                      weight_decay=weight_decay,
                      popsize=population)
        es = ga
    elif optimizer == 'cma':
        cma = CMAES(num_params,
                    sigma_init=sigma_init,
                    popsize=population,
                    weight_decay=weight_decay)
        es = cma
    elif optimizer == 'pepg':
        pepg = PEPG(num_params,
                    sigma_init=sigma_init,
                    sigma_decay=sigma_decay,
                    sigma_alpha=0.20,
                    sigma_limit=0.02,
                    learning_rate=0.01,
                    learning_rate_decay=1.0,
                    learning_rate_limit=0.01,
                    weight_decay=weight_decay,
                    popsize=population)
        es = pepg
    elif optimizer == 'oes':
        oes = OpenES(num_params,
                     sigma_init=sigma_init,
                     sigma_decay=sigma_decay,
                     sigma_limit=0.02,
                     learning_rate=0.01,
                     learning_rate_decay=1.0,
                     learning_rate_limit=0.01,
                     antithetic=antithetic,
                     weight_decay=weight_decay,
                     popsize=population)
        es = oes
    # elif optimizer == 'pso':
    #   pso = PSO(num_params,
    #     sigma_init=sigma_init,
    #     weight_decay=weight_decay,
    #     popsize=population)
    #   es = pso
    elif optimizer == 'global_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='global')
        es = pso
    elif optimizer == 'local_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='local')
        es = pso
    elif optimizer == 'random_pso':
        pso = Pyswarms(num_params,
                       sigma_init=sigma_init,
                       weight_decay=weight_decay,
                       popsize=population,
                       communication_topology='random')
        es = pso
    else:
        if optimizer in list(sorted(ng.optimizers.registry.keys())):
            ng_optimizer = Nevergrad(optimizer,
                                     num_params,
                                     sigma_init=sigma_init,
                                     popsize=population,
                                     weight_decay=weight_decay)
            es = ng_optimizer
        else:
            raise ValueError('Could not find optimizer!')

    PRECISION = 10000
    SOLUTION_PACKET_SIZE = (5 + num_params) * num_worker_trial
    RESULT_PACKET_SIZE = 4 * num_worker_trial