def render(click_reset, click_training, cost_factor, risk_factor, levels_cooling, lr, n_episodes, type_agent): print("Reset ", click_reset, " - ", reset_clicks.count) print("Train ", click_training, " - ", train_clicks.count) if click_reset > reset_clicks.count: reset_clicks.count = click_reset env.__init__(levels_cooling=levels_cooling, risk_factor=risk_factor, cost_factor=cost_factor, keep_cooling=True) elif click_training > train_clicks.count: train_clicks.count = click_training env_temp, agent, rewards = run_n_episodes(env, n_episodes=n_episodes, lr=lr, type_agent=type_agent) utils.plot_average_running_rewards( rewards, "C:/Users/talvesdacosta/Desktop/results.png") # os.system("start "+"C:/Users/talvesdacosta/Desktop/results.png") env.cooling = env_temp.cooling else: env.risk_factor = risk_factor env.cost_factor = cost_factor return env.render(with_plotly=True)
r, np.expand_dims(s_next,axis=0), np.expand_dims(s_before,axis=0), done) # Go to the next state s_before = s s = s_next # If the episode is terminated if done: print("Episode {}/{} finished after {} timesteps - epsilon : {:.2}".format(i_episode+1,N_EPISODES,i_step,agent.epsilon)) break #----------------------------------------- # Store the rewards rewards.append(i_step) # Training agent.train(batch_size = BATCH_SIZE) # Plot the average running rewards utils.plot_average_running_rewards(rewards)