Пример #1
0
    n_steps = 0

    for i in range(n_games):
        observation = env.reset()
        done = False
        score = 0
        while not done:
            env.render()
            action, prob, val = agent.choose_actions(observation)
            observation_, reward, done, info = env.step(action)

            n_steps += 1
            score += reward
            agent.remember(observation, action, prob, val, reward, done)
            if n_steps % N == 0:
                agent.learn()
                learn_iters += 1
            observation = observation_
        score_history.append(score)
        avg_score = np.mean(score_history[-100:])

        if avg_score > best_score:
            best_score = avg_score
            agent.save_models()

        print(
            f"episode {i}: score {score:.1f} avg score {avg_score: .1f} time_steps {n_steps} learning_steps {learn_iters}"
        )
    x = [i + 1 for i in range(len(score_history))]

    plot_learning_curve(x, score_history, figure_file)
Пример #2
0
                    #     have_grad = True
                    # if have_grad:
                    #     observation_ = fgsm_attack(observation_, perturbation, data_grad)

                    data_grad = advAgent.compute_grads()
                    if data_grad is not False:
                        observation_ = fgsm_attack(observation_, perturbation, data_grad)

                    n_steps += 1
                    score += reward
                    agent.remember(observation, action, prob, val, reward, done)
                    advAgent.remember(observation, action, reward, observation_, done)
                    
                    if not load_checkpoint:
                        if n_steps % N == 0:
                            loss.append(agent.learn())
                            learn_iters += 1

                    observation = observation_
                
                if not load_checkpoint:
                    avg_loss = np.mean(loss, axis=0)
                    actor_loss.append(avg_loss[0])
                    critic_loss.append(avg_loss[1])
                    total_loss.append(avg_loss[2])

                score_history.append(score)
                avg_score = np.mean(score_history[-100:])
                avg_score_history.append(avg_score)
            
            score_book[trial] = score_history