def main():

    # spin up environment
    env = TwoAreaPowerSystemEnv()
    env.seed(2)
    state = env.reset()

    # spin up agent
    agent = DdpgController(state_size=7, action_size=2, random_seed=2)

    # spin up the power demand signal
    signal = StepSignal()
    signal.reset(15, 'on', 0.01, 'off', 0.0)

    # Load the actor and critic networks
    agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
    agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

    # initialise empty list to store simulation output
    out_s_1 = [0]
    out_s_2 = [0]
    out_tieline = [0]
    control_s_1 = [0]
    control_s_2 = [0]
    demand_list = [0]
    time_list = [0]

    score = 0

    while True:

        action = agent.act(state, add_noise=False)

        demand = (signal.del_p_L_1_func(env.t), signal.del_p_L_2_func(env.t))

        state, reward, done, _ = env.step(action, demand)

        score += reward

        out_s_1.append(state[2])
        out_s_2.append(state[6])
        out_tieline.append(state[3])
        control_s_1.append(action[0])
        control_s_2.append(action[1])
        demand_list.append(demand[0])
        time_list.append(env.t)

        if done:
            break

    print('Score: {}'.format(score))

    # Plot the agent performance png files
    png_plot_file_path = './test_plots/ddpg_test_plot/pngplot/zz_plot_final.png'
    pik_file_path = './test_plots/ddpg_test_plot/pickledata/zz_plot_final.pkl'
    capture_agent_progress(time_list, out_s_1, out_s_2, control_s_1,
                           control_s_2, out_tieline, demand_list,
                           png_plot_file_path, pik_file_path)
def main():

    # spin up environment
    env = TwoAreaPowerSystemEnv()
    env.seed(2)
    state = env.reset()

    # spin up agent
    agent = DdpgController(state_size=7, action_size=2, random_seed=2)

    # spin up the power demand signal
    signal = StepSignal()
    signal.reset(1, 'on', 0.01, 'off', 0.0)

    # Load the actor and critic networks
    agent.actor_local.load_state_dict(torch.load('checkpoint_actor.pth'))
    agent.critic_local.load_state_dict(torch.load('checkpoint_critic.pth'))

    # initialise empty list to store simulation output
    out_s_1 = [0]
    out_s_2 = [0]
    control_s_1 = [0]
    control_s_2 = [0]
    demand_list = [0]
    time_list = [0]

    score = 0

    while True:

        action = agent.act(state, add_noise=False)

        demand = (signal.del_p_L_1_func(env.t), signal.del_p_L_2_func(env.t))

        state, reward, done, _ = env.step(action, demand)

        score += reward

        out_s_1.append(state[2])
        out_s_2.append(state[6])
        time_list.append(env.t)
        control_s_1.append(action[0])
        control_s_2.append(action[1])
        demand_list.append(demand[0])

        if done:
            break

    print('Score: {}'.format(score))

    plt.subplot(411)
    plt.plot(time_list, out_s_1)
    plt.plot(time_list, out_s_2)

    plt.subplot(412)
    plt.plot(time_list, control_s_1)

    plt.subplot(413)
    plt.plot(time_list, control_s_2)

    plt.subplot(414)
    plt.plot(time_list, demand_list)

    plt.show()
Exemplo n.º 3
0
def agent_test(mag, dur):

    ####################################
    # Create the environment
    ####################################
    # spin up environment
    env = TwoAreaPowerSystemEnv()

    # reset the agent
    state = env.reset()
    ####################################

    # View the action space and the state space
    #print("Action space: {}".format(env.action_space))
    #print("State space: {}".format(env.observation_space))

    ####################################
    # Create the controller
    ####################################
    # implement controller
    agent = ClassicalPiController()

    # reset the controller
    action = agent.reset()
    ####################################

    ####################################
    # Create the signal
    ####################################
    # implement the signal
    signal = StepSignal()
    signal.reset(dur, 'on', mag, 'off', 0.0)
    ####################################

    # initialise empty list to store simulation output
    out_s_1 = [0]
    out_s_2 = [0]
    out_tieline = [0]
    control_s_1 = [0]
    control_s_2 = [0]
    demand_list = [0]
    time_list = [0]

    score = 0

    while True:

        # Obtain the current demand
        demand = (
            signal.del_p_L_1_func(env.t),  # power demand for area 1
            signal.del_p_L_2_func(env.t))  # power demand for area 2

        # Step the environment forward by one step
        state, reward, done, _ = env.step(action, demand)

        score += reward

        out_s_1.append(state[2])
        out_s_2.append(state[6])
        out_tieline.append(state[3])
        demand_list.append(demand[0])
        time_list.append(env.t)

        # Given the current state observation take an action
        action = agent.act(state, (env.t, env.t + env.t_delta))

        control_s_1.append(action[0])
        control_s_2.append(action[1])

        action /= 0.02

        if done:
            break

    print('Score: {}'.format(score))

    if mag < 0:
        fp = 'neg'
    else:
        fp = 'pos'

    png_plot_file_path = './test_plots/pi_test_plot/pngplot/{}_{}_plot_final.png'.format(
        fp, dur)
    pik_file_path = './test_plots/pi_test_plot/pickledata/{}_{}_plot_final.pkl'.format(
        fp, dur)
    capture_agent_progress(time_list, out_s_1, out_s_2, control_s_1,
                           control_s_2, out_tieline, demand_list,
                           png_plot_file_path, pik_file_path)
def main():

    ####################################
    # Create the environment
    ####################################
    # spin up environment
    env = TwoAreaPowerSystemEnv()

    # reset the agent
    state = env.reset()
    ####################################

    # View the action space and the state space
    print("Action space: {}".format(env.action_space))
    print("State space: {}".format(env.observation_space))

    ####################################
    # Create the controller
    ####################################
    # implement controller
    agent = ClassicalPiController()

    # reset the controller
    action = agent.reset()
    ####################################

    ####################################
    # Create the signal
    ####################################
    # implement the signal
    signal = StepSignal()
    signal.reset(1, 'on', 0.01, 'off', 0.0)
    ####################################

    # initialise empty list to store simulation output
    out_s_1 = [0]
    out_s_2 = [0]
    out_tieline = [0]
    control_s_1 = [0]
    control_s_2 = [0]
    demand_list = [0]
    time_list = [0]

    score = 0

    while True:

        # Obtain the current demand
        demand = (
            signal.del_p_L_1_func(env.t),  # power demand for area 1
            signal.del_p_L_2_func(env.t))  # power demand for area 2

        # Step the environment forward by one step
        state, reward, done, _ = env.step(action, demand)

        score += reward

        out_s_1.append(state[2])
        out_s_2.append(state[6])
        out_tieline.append(state[3])
        demand_list.append(demand[0])
        time_list.append(env.t)

        # Given the current state observation take an action
        action = agent.act(state, (env.t, env.t + env.t_delta))

        control_s_1.append(action[0])
        control_s_2.append(action[1])

        if done:
            break

    print('Score: {}'.format(score))

    plt.subplot(511)
    plt.plot(time_list, out_s_1)
    plt.plot(time_list, out_s_2)

    plt.subplot(512)
    plt.plot(time_list, out_tieline)

    plt.subplot(513)
    plt.plot(time_list, control_s_1)

    plt.subplot(514)
    plt.plot(time_list, control_s_2)

    plt.subplot(515)
    plt.plot(time_list, demand_list)

    plt.show()