import tensorflow as tf import matplotlib.pyplot as plt import architectures import numpy as np import dynamics as dn import rl # Model and reward to test algorithms model_path = "../models/dueling_ddqn" reward_path = "cummulative_reward_dueling_ddqn.pickle" reward = rl.readData(reward_path) # Instances of the environment generator = dn.Node(powerSetPoint=3.15) load = dn.Node(powerSetPoint=-3.30) area = dn.Area(frequencySetPoint=50, M=0.1, D=0.0160) area.calculateDeltaF([generator, load]) # Define list of powers and frequencies power = [] frequencies = [] # Let's tensorflow this tf.reset_default_graph() graph = tf.train.import_meta_graph(model_path + ".meta") steps = 100 with tf.Session() as session:
with tf.Session() as session: session.run(init) # Iterate all the episodes for i in range(episodes): print("\nEPISODE: ", i) # Store cummulative reward per episode cumm_r_list.append(cumm_r) cumm_r = 0 # Store the experience from the episode episodeBuffer = [] # Instances of the environment generator_1 = dn.Node(powerSetPoint=1.5) generator_2 = dn.Node(powerSetPoint=1.5) load = dn.Node(powerSetPoint=-3.0 + (-0.25 + np.random.rand() / 2)) area = dn.Area(frequencySetPoint=50, M=0.1, D=0.0160) area.calculateDeltaF([generator_1, generator_2, load]) # Initial state for the LSTM state_1 = (np.zeros([1, h_size]), np.zeros([1, h_size])) state_2 = (np.zeros([1, h_size]), np.zeros([1, h_size])) # Iterate all over the steps for j in range(steps): # Get the action from the actor and the internal state of the rnn current_f = area.getDeltaF()
buffer = rl.experience_buffer() # Launch the learning with tf.Session() as session: session.run(init) # Iterate all the episodes for i in range(episodes): print("\nEPISODE: ",i) # Store cummulative reward per episode cumm_r_list.append(cumm_r) cumm_r = 0 # Instances of the environment generator = dn.Node(powerSetPoint=3.15) load = dn.Node(powerSetPoint=-3.15+ (-0.25+np.random.rand()/2)) area = dn.Area(frequencySetPoint=50,M=1,D=0) area.calculateDeltaF([generator,load]) # Iterate all over the steps for j in range(steps): # Choose the greedy action and the Q values current_f = area.getDeltaF() a,Q_values = session.run([mainNet.predict,mainNet.Qout], feed_dict={mainNet.inputs:np.array(current_f).reshape(1,1)}) a = a[0] # Explore if epsilon parameter agrees if np.random.rand() < epsilon:
import tensorflow as tf import matplotlib.pyplot as plt import architectures import numpy as np import dynamics as dn import rl # Model and reward to test algorithms model_path = "../models/maddpg" reward_path = "cummulative_reward_maddpg.pickle" reward = rl.readData(reward_path) # Instances of the environment generator_1 = dn.Node(powerSetPoint=1.5) generator_2 = dn.Node(powerSetPoint=1.5) load = dn.Node(powerSetPoint=-3.15) area = dn.Area(frequencySetPoint=50,M=0.1,D=0.0160) area.calculateDeltaF([generator_1,generator_2,load]) # Define list of powers and frequencies power_1 = [] power_2 = [] frequencies = [] # Let's tensorflow this tf.reset_default_graph() graph = tf.train.import_meta_graph(model_path+".meta") steps = 100