Exemplo n.º 1
0
import tensorflow as tf
import matplotlib.pyplot as plt
import architectures
import numpy as np
import dynamics as dn
import rl

# Model and reward to test algorithms
model_path = "../models/dueling_ddqn"
reward_path = "cummulative_reward_dueling_ddqn.pickle"

reward = rl.readData(reward_path)

# Instances of the environment
generator = dn.Node(powerSetPoint=3.15)
load = dn.Node(powerSetPoint=-3.30)
area = dn.Area(frequencySetPoint=50, M=0.1, D=0.0160)
area.calculateDeltaF([generator, load])

# Define list of powers and frequencies
power = []
frequencies = []

# Let's tensorflow this
tf.reset_default_graph()
graph = tf.train.import_meta_graph(model_path + ".meta")

steps = 100

with tf.Session() as session:
Exemplo n.º 2
0
with tf.Session() as session:
    session.run(init)

    # Iterate all the episodes
    for i in range(episodes):
        print("\nEPISODE: ", i)

        # Store cummulative reward per episode
        cumm_r_list.append(cumm_r)
        cumm_r = 0

        # Store the experience from the episode
        episodeBuffer = []

        # Instances of the environment
        generator_1 = dn.Node(powerSetPoint=1.5)
        generator_2 = dn.Node(powerSetPoint=1.5)
        load = dn.Node(powerSetPoint=-3.0 + (-0.25 + np.random.rand() / 2))
        area = dn.Area(frequencySetPoint=50, M=0.1, D=0.0160)
        area.calculateDeltaF([generator_1, generator_2, load])

        # Initial state for the LSTM
        state_1 = (np.zeros([1, h_size]), np.zeros([1, h_size]))
        state_2 = (np.zeros([1, h_size]), np.zeros([1, h_size]))

        # Iterate all over the steps
        for j in range(steps):

            # Get the action from the actor and the internal state of the rnn
            current_f = area.getDeltaF()
Exemplo n.º 3
0
buffer = rl.experience_buffer()

# Launch the learning
with tf.Session() as session:
    session.run(init)
    
    # Iterate all the episodes
    for i in range(episodes):
        print("\nEPISODE: ",i)
        
        # Store cummulative reward per episode
        cumm_r_list.append(cumm_r)
        cumm_r = 0
        
        # Instances of the environment
        generator = dn.Node(powerSetPoint=3.15)
        load = dn.Node(powerSetPoint=-3.15+ (-0.25+np.random.rand()/2))
        area = dn.Area(frequencySetPoint=50,M=1,D=0)
        area.calculateDeltaF([generator,load])
        
        # Iterate all over the steps
        for j in range(steps):
            
            # Choose the greedy action and the Q values
            current_f = area.getDeltaF()
            a,Q_values = session.run([mainNet.predict,mainNet.Qout],
                                     feed_dict={mainNet.inputs:np.array(current_f).reshape(1,1)})
            a = a[0]
            
            # Explore if epsilon parameter agrees
            if np.random.rand() < epsilon:
Exemplo n.º 4
0
import tensorflow as tf
import matplotlib.pyplot as plt
import architectures
import numpy as np
import dynamics as dn
import rl

# Model and reward to test algorithms
model_path = "../models/maddpg"
reward_path = "cummulative_reward_maddpg.pickle"

reward = rl.readData(reward_path)

# Instances of the environment
generator_1 = dn.Node(powerSetPoint=1.5)
generator_2 = dn.Node(powerSetPoint=1.5)
load = dn.Node(powerSetPoint=-3.15)
area = dn.Area(frequencySetPoint=50,M=0.1,D=0.0160)
area.calculateDeltaF([generator_1,generator_2,load])

# Define list of powers and frequencies
power_1 = []
power_2 = []
frequencies = []

# Let's tensorflow this
tf.reset_default_graph()
graph = tf.train.import_meta_graph(model_path+".meta")

steps = 100