n_steps = 0 for i in range(n_games): observation = env.reset() done = False score = 0 while not done: env.render() action, prob, val = agent.choose_actions(observation) observation_, reward, done, info = env.step(action) n_steps += 1 score += reward agent.remember(observation, action, prob, val, reward, done) if n_steps % N == 0: agent.learn() learn_iters += 1 observation = observation_ score_history.append(score) avg_score = np.mean(score_history[-100:]) if avg_score > best_score: best_score = avg_score agent.save_models() print( f"episode {i}: score {score:.1f} avg score {avg_score: .1f} time_steps {n_steps} learning_steps {learn_iters}" ) x = [i + 1 for i in range(len(score_history))] plot_learning_curve(x, score_history, figure_file)
# have_grad = True # if have_grad: # observation_ = fgsm_attack(observation_, perturbation, data_grad) data_grad = advAgent.compute_grads() if data_grad is not False: observation_ = fgsm_attack(observation_, perturbation, data_grad) n_steps += 1 score += reward agent.remember(observation, action, prob, val, reward, done) advAgent.remember(observation, action, reward, observation_, done) if not load_checkpoint: if n_steps % N == 0: loss.append(agent.learn()) learn_iters += 1 observation = observation_ if not load_checkpoint: avg_loss = np.mean(loss, axis=0) actor_loss.append(avg_loss[0]) critic_loss.append(avg_loss[1]) total_loss.append(avg_loss[2]) score_history.append(score) avg_score = np.mean(score_history[-100:]) avg_score_history.append(avg_score) score_book[trial] = score_history