Exemplo n.º 1
0
    # training
    for i in range(n_train):

        # print(thetas_archive)
        t_sample = time()
        batch, n_reused, idx_reused, scores_reused = sampler.ask(
            pop_size, optimizer)
        scores = np.zeros(pop_size)
        t_sample = time() - t_sample

        # reused samples
        for j in range(n_reused):

            scores[j] = scores_reused[j]
            sample_archive.add_gen(idx_reused[j], curr_gen)
            thetas_archive[curr_gen].samples.append(idx_reused[j])

        # newly drawn samples
        t_eval = time()
        for j in range(n_reused, pop_size):

            nn.set_params(batch[j])
            score = env.eval(nn, render=False)
            sample_archive.add_sample(Sample(batch[j], score, [curr_gen]))
            thetas_archive[curr_gen].samples.append(n_ind)
            scores[j] = score
            n_ind += 1

            if score > best_score:
                best_params = batch[j]