def render(click_reset, click_training, cost_factor, risk_factor,
           levels_cooling, lr, n_episodes, type_agent):

    print("Reset ", click_reset, " - ", reset_clicks.count)
    print("Train ", click_training, " - ", train_clicks.count)

    if click_reset > reset_clicks.count:
        reset_clicks.count = click_reset
        env.__init__(levels_cooling=levels_cooling,
                     risk_factor=risk_factor,
                     cost_factor=cost_factor,
                     keep_cooling=True)

    elif click_training > train_clicks.count:
        train_clicks.count = click_training
        env_temp, agent, rewards = run_n_episodes(env,
                                                  n_episodes=n_episodes,
                                                  lr=lr,
                                                  type_agent=type_agent)
        utils.plot_average_running_rewards(
            rewards, "C:/Users/talvesdacosta/Desktop/results.png")
        # os.system("start "+"C:/Users/talvesdacosta/Desktop/results.png")
        env.cooling = env_temp.cooling
    else:
        env.risk_factor = risk_factor
        env.cost_factor = cost_factor

    return env.render(with_plotly=True)
示例#2
0
                r,
                np.expand_dims(s_next,axis=0),
                np.expand_dims(s_before,axis=0),
                done)
                
            # Go to the next state
            s_before = s
            s = s_next
            
            # If the episode is terminated
            if done:
                print("Episode {}/{} finished after {} timesteps - epsilon : {:.2}".format(i_episode+1,N_EPISODES,i_step,agent.epsilon))
                break


        #-----------------------------------------

        # Store the rewards
        rewards.append(i_step)


        # Training
        agent.train(batch_size = BATCH_SIZE)





    # Plot the average running rewards
    utils.plot_average_running_rewards(rewards)