Exemplo n.º 1
0
thetas_archive = Archive(max_size=n_train)

# sampler
sampler = Samplers.BasicSampler(sample_archive, thetas_archive)

# training
for i in range(n_train):

    batch = optimizer.ask(pop_size)
    scores = np.zeros(pop_size)
    # newly drawn samples
    for j in range(pop_size):

        nn.set_params(batch[j])
        score = env.eval(nn, render=False)
        sample_archive.add_sample(Sample(batch[j], score, [curr_gen]))
        scores[j] = score

    # statistics on the current batch
    print("Best/Average score in pop {0}: {1:.2f}, {2:.2f}".format(
        i, np.max(scores), np.mean(scores)))

    # optimization step
    optimizer.tell(batch, scores)
    curr_gen += 1

# testing best
best_params = optimizer.best_param()
nn.set_params(best_params)
for i in range(n_test):
    score = env.eval(nn, render=True)
Exemplo n.º 2
0
    env = Env("CartPole-v0")
    act_space = env.get_action_space()
    obs_space = env.get_obs_space()

    # optimization stuff
    nn = FFIndiv(obs_space, act_space, hidden_size=2)
    best_params = nn.get_params()
    best_score = env.eval(nn, render=False)
    optimizer = ES.CMAES(nn.get_params().shape[0], pop_size=pop_size)

    # archives
    sample_archive = Archive(max_size=n_train * pop_size)
    thetas_archive = Archive(max_size=n_train)
    mu, cov = optimizer.get_distrib_params()
    thetas_archive.add_sample(Theta(mu, cov, []))

    # sampler
    sampler = Samplers.ClosestSampler(sample_archive,
                                      thetas_archive,
                                      accept_ratio=accept_ratio)

    print("Problem dimension:", mu.shape[0])
    df = pd.DataFrame(columns=[
        "n_reused", "best_score", "average_score", "sample_time",
        "evaluation_time"
    ])

    # training
    for i in range(n_train):