def main(args): """ Environment used in this code is Pendulum-v0 from OpenAI gym. States: cos(theta), sin(theta), theta_dt Actions: Force application between -2 to 2 Reward: -(Θ^2 + 0.1*Θ_dt^2 + 0.001*action^2) Objective: Pendulum is vertical, with 0 movement. Initialization: Starts at a random angle, and at a random velocity. End: After all the steps are exhausted """ # Initialize saver saver = tf.train.Saver() with tf.Session() as sess: # Create the gym environment env = MimoCstr(nsim=args['max_episode_len']) # env = gym.make(args['env']) # Set all the random seeds for the random packages np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) tflearn.init_graph(seed=args['random_seed']) # Define all the state and action dimensions, and the bound of the action state_dim = env.observation_space.shape[0] action_dim = env.action_space.shape[0] action_bound = env.action_space.high # Ensure action bound is symmetric assert (env.action_space.high == -env.action_space.low) # Restore old model # saver.restore(sess, args['ckpt_dir']) # Initialize the actor and critic actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) # Initialize Ornstein Uhlenbeck Noise actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) # Train the Actor-Critic Model replay_buffer, action_list = train(sess, env, args, actor, critic, actor_noise) # Save the model saver.save(sess, args['ckpt_dir']) return actor, critic, env, replay_buffer, action_list
def simulation(): # Plant Model model_plant = MimoCstr(nsim=50) # Build Controller Model model_control = MimoCstr(nsim=model_plant.Nsim, nx=model_plant.Nx * 2, xs=np.array([0.878, 324.5, 0.659, 0, 0, 0]), x0=np.array([1, 310, 0.659, 0, 0, 0]), control=True) # MPC Object Initiation control = ModelPredictiveControl(model_control.Nsim, 10, model_control.Nx, model_control.Nu, 0.1, 0.1, 0.1, model_control.xs, model_control.us, dist=True) # MPC Construction mpc_control = control.get_mpc_controller(model_control.cstr_ode, control.eval_time, model_control.x0, random_guess=False) """ Simulation portion """ for t in range(model_plant.Nsim): # Solve the MPC optimization problem, obtain current input and predicted state model_control.u[t, :], model_control.x[t + 1, :] = control.solve_mpc( model_plant.x, model_plant.xsp, mpc_control, t, control.p) # Calculate the next states for the plant model_plant.x[t + 1, :] = model_plant.next_state( model_plant.x[t, :], model_control.u[t, :]) # Update the P parameters for offset-free control control.p = model_plant.x[t + 1, :] - model_control.x[t + 1, 0:3] print(model_plant.cost_function()) return model_plant, model_control, control
def simulation(): # MPC Evaluation Period eval_period = 5 # Model Initiation model = MimoCstr(nsim=50, k0=8.2e10) # MPC Initiation mpc_init = ModelPredictiveControl(10, model.Nx, model.Nu, 0.1, 0.1, 0.1, model.xs, model.us) # MPC Construction mpc_control = mpc_init.get_mpc_controller(model.cstr_ode, eval_period, model.x0, random=False, verbosity=0) # Output Disturbance output_disturb = np.zeros(model.Nx) x_corrected = np.zeros([model.Nsim + 1, model.Nx]) """ Simulation portion """ for t in range(model.Nsim): """ Disturbance """ # if t == 10: # model.disturbance() """ MPC evaluation """ if t % eval_period == 0 and t != 0: # Solve the MPC optimization problem mpc_init.solve_mpc(model.x, model.u, model.xsp, mpc_control, t) if t != 0: output_disturb = model.xs - model.x[t, :] elif t < 5: model.u[t, :] = [300, 0.1] else: model.u[t, :] = model.u[t - 1, :] # Calculate the next stages model.x[t + 1, :] = model.next_state(model.x[t, :], model.u[t, :]) x_corrected[t + 1, :] = model.x[t + 1, :] + output_disturb print(model.cost_function(x_corrected)) return model, mpc_init, x_corrected
import tensorflow as tf import gym import numpy as np import os import matplotlib.pyplot as plt from CSTR_model import MimoCstr os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2' model = MimoCstr(nsim=50) action_list = [-1, 1] def reward_calc(temp, temp_sp): rewards = 0 if temp_sp * 0.999 < temp < temp_sp * 1.001: rewards = rewards + 15 - abs(temp - temp_sp) * 20 else: rewards = rewards - np.power(temp - temp_sp, 2) return rewards num_inputs = 3 num_hidden = 4 num_output = 1 # Prob to go left 1 - left = right initializer = tf.contrib.layers.variance_scaling_initializer() X = tf.placeholder(tf.float32, shape=[None, num_inputs])
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' def reward_calc(temp, temp_sp): rewards = 0 if temp_sp * 0.997 < temp < temp_sp * 1.003: rewards = rewards + 15 - abs(temp - temp_sp) * 20 else: rewards = rewards - np.power(temp - temp_sp, 2) return rewards model = MimoCstr(delta=1, nsim=500) rl = ReinforceLearning(discount_factor=0.97, states_start=300, states_stop=340, states_interval=0.5, actions_start=-15, actions_stop=15, actions_interval=2.5, learning_rate=0.1, epsilon=0.1, doe=0, eval_period=5) states = np.zeros([75]) states[0:15] = np.arange(290, 310, 20 / 15)