figure_file = 'plots/fl.png' best_score = env.reward_range[0] score_history = [] learn_iters = 0 avg_score = 0 n_steps = 0 for i in range(n_games): observation = env.reset() done = False score = 0 while not done: action, prob, val = agent.choose_action(observation) observation_, reward, done, info = env.step(action) n_steps += 1 score += reward agent.remember(observation, action, prob, val, reward, done) if n_steps % N == 0: agent.learn() learn_iters += 1 observation = observation_ score_history.append(score) avg_score = np.mean(score_history[-100:]) if avg_score > best_score: best_score = avg_score agent.save_models()
avg_score = 0 n_steps = 0 if load_checkpoint: agent.load_models() for i in tqdm(range(n_episodes)): observation = env.reset() done = False score = 0 while not done: if render: env.render(mode='human') action, prob, val = agent.choose_action(observation) observation_, reward, done, info = env.step(action) n_steps += 1 score += reward agent.remember(observation, action, prob, val, reward, done) if not load_checkpoint: if n_steps % N == 0: loss.append(agent.learn()) learn_iters += 1 observation = observation_ if not load_checkpoint: avg_loss = np.mean(loss, axis=0) actor_loss.append(avg_loss[0])