def main():

    # spin up environment
    env = TwoAreaPowerSystemEnv()
    env.seed(2)
    state = env.reset()

    # spin up agent
    agent = DdpgController(state_size=7, action_size=2, random_seed=2)

    # spin up the power demand signal
    signal = StepSignal()
    signal.reset(15, 'on', 0.01, 'off', 0.0)

    # Load the actor and critic networks
    agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
    agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

    # initialise empty list to store simulation output
    out_s_1 = [0]
    out_s_2 = [0]
    out_tieline = [0]
    control_s_1 = [0]
    control_s_2 = [0]
    demand_list = [0]
    time_list = [0]

    score = 0

    while True:

        action = agent.act(state, add_noise=False)

        demand = (signal.del_p_L_1_func(env.t), signal.del_p_L_2_func(env.t))

        state, reward, done, _ = env.step(action, demand)

        score += reward

        out_s_1.append(state[2])
        out_s_2.append(state[6])
        out_tieline.append(state[3])
        control_s_1.append(action[0])
        control_s_2.append(action[1])
        demand_list.append(demand[0])
        time_list.append(env.t)

        if done:
            break

    print('Score: {}'.format(score))

    # Plot the agent performance png files
    png_plot_file_path = './test_plots/ddpg_test_plot/pngplot/zz_plot_final.png'
    pik_file_path = './test_plots/ddpg_test_plot/pickledata/zz_plot_final.pkl'
    capture_agent_progress(time_list, out_s_1, out_s_2, control_s_1,
                           control_s_2, out_tieline, demand_list,
                           png_plot_file_path, pik_file_path)
Exemplo n.º 2
0
def main():

    # spin up environment
    env = TwoAreaPowerSystemEnv()
    env.seed(2)

    # spin up ddpg agent
    agent_ddpg = DdpgController(state_size=7, action_size=2, random_seed=2)

    # spin up pi agent
    agent_pi = ClassicalPiController()

    # bundle agents
    agent_list = [agent_ddpg, agent_pi]

    ####################################################
    # COMMENT OUT IF NOT CONTINUING TRAINING
    ####################################################
    # Load the actor and critic networks
    #agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
    #agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

    # spin up the power demand signal
    signal = StepSignal()

    # train the agent
    scores = ddpg_train(env, agent_list, signal)
def main():

    # spin up environment
    env = TwoAreaPowerSystemEnv()
    env.seed(2)
    state = env.reset()

    # spin up agent
    agent = DdpgController(state_size=7, action_size=2, random_seed=2)

    # spin up the power demand signal
    signal = StepSignal()
    signal.reset(1, 'on', 0.01, 'off', 0.0)

    # Load the actor and critic networks
    agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
    agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

    # initialise empty list to store simulation output
    out_s_1 = [0]
    out_s_2 = [0]
    control_s_1 = [0]
    control_s_2 = [0]
    demand_list = [0]
    time_list = [0]

    score = 0

    while True:

        action = agent.act(state, add_noise=False)

        demand = (signal.del_p_L_1_func(env.t), signal.del_p_L_2_func(env.t))

        state, reward, done, _ = env.step(action, demand)

        score += reward

        out_s_1.append(state[2])
        out_s_2.append(state[6])
        time_list.append(env.t)
        control_s_1.append(action[0])
        control_s_2.append(action[1])
        demand_list.append(demand[0])

        if done:
            break

    print('Score: {}'.format(score))

    plt.subplot(411)
    plt.plot(time_list, out_s_1)
    plt.plot(time_list, out_s_2)

    plt.subplot(412)
    plt.plot(time_list, control_s_1)

    plt.subplot(413)
    plt.plot(time_list, control_s_2)

    plt.subplot(414)
    plt.plot(time_list, demand_list)

    plt.show()