from rl.agents.dqn import DQNAgent from rl.policy import MaxBoltzmannQPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy from rl.memory import SequentialMemory from gym.wrappers import FlattenObservation import gym_electric_motor as gem from gym_electric_motor.visualization import MotorDashboard from gym_electric_motor.reference_generators import WienerProcessReferenceGenerator if __name__ == '__main__': # Run the option parsing # Get the environment and extract the number of actions. env = gem.make( 'emotor-dc-series-disc-v1', state_filter=['i'], # Pass an instance visualization=MotorDashboard(visu_period=0.5, plotted_variables=['omega', 'i', 'u']), converter='Disc-4QC', # Take standard class and pass parameters (Load) a=0, b=.1, c=1.1, j_load=0.4, # Pass a string (with extra parameters) ode_solver='euler', solver_kwargs={}, # Pass a Class with extra parameters reference_generator=WienerProcessReferenceGenerator( reference_state='i', sigma_range=(5e-3, 5e-1))) nb_actions = env.action_space.n env = FlattenObservation(env) model = Sequential()
# Define the reward function and to which state variable it applies # Here, we define it for current control reward_function=WeightedSumOfErrors(observed_states='i'), # Defines which numerical solver is to be used for the simulation # euler is fastest but not most precise ode_solver='euler', solver_kwargs={}, # Define and parameterize the reference generator for the current reference reference_generator=WienerProcessReferenceGenerator( reference_state='i', sigma_range=(3e-3, 3e-2)), # Defines which variables to plot via the builtin dashboard monitor visualization=MotorDashboard(state_plots=['i', 'omega']), ) # Now, the environment will output states and references separately state, ref = env.reset() # For data processing we sometimes want to flatten the env output, # which means that the env will only output one array that contains states and references consecutively env = FlattenObservation(env) obs = env.reset() # Read the number of possible actions for the given env # this allows us to define a proper learning agent for this task nb_actions = env.action_space.n window_length = 1
>>> cd examples >>> python pi_series_omega_control.py """ from agents.simple_controllers import Controller import time import sys, os sys.path.append(os.path.abspath(os.path.join('..'))) import gym_electric_motor as gem from gym_electric_motor.reference_generators import * from gym_electric_motor.visualization import MotorDashboard if __name__ == '__main__': env = gem.make( 'emotor-dc-series-cont-v1', # Pass an instance visualization=MotorDashboard(plotted_variables=['all'], visu_period=1), # Take standard class and pass parameters (Load) load_parameter=dict(a=0.01, b=.1, c=0.1, j_load=.05), # Pass a string (with extra parameters) ode_solver='euler', solver_kwargs={}, # Pass a Class with extra parameters reference_generator=MultiReferenceGenerator( sub_generators=[ SinusoidalReferenceGenerator, WienerProcessReferenceGenerator(), StepReferenceGenerator() ], p=[0.1, 0.8, 0.1],
# initializer for a ranom initial speed with uniform distribution within the interval omega=60 to omega=80 uniform_init = { 'random_init': 'uniform', 'interval': [[60, 80]], 'states': { 'omega': 0 } } # initializer for a specific speed load_init = {'states': {'omega': 20}} if __name__ == '__main__': env = gem.make( 'DcSeriesCont-v1', visualization=MotorDashboard(plots=['omega', 'i'], dark_mode=False), motor_parameter=dict(j_rotor=0.001), load_parameter=dict(a=0, b=0.1, c=0, j_load=0.001), ode_solver='scipy.solve_ivp', solver_kwargs=dict(), reference_generator=rg.SwitchedReferenceGenerator( sub_generators=[ rg.SinusoidalReferenceGenerator(reference_state='omega'), rg.WienerProcessReferenceGenerator(reference_state='omega'), rg.StepReferenceGenerator(reference_state='omega') ], p=[0.2, 0.6, 0.2], super_episode_length=(1000, 10000)), # Pass the predefined initializers motor_initializer=gaussian_init,
# usage of a random load initializer is only recommended for the # ConstantSpeedLoad, due to the already given profile by an ExternalSpeedLoad load_init = {'random_init': 'uniform'}, # External speed profiles can be given by an ExternalSpeedLoad, # inital value is given by bias of the profile sampling_time = 1e-4 if __name__ == '__main__': # Create the environment env = gem.make( 'DcSeriesCont-v1', ode_solver='scipy.solve_ivp', solver_kwargs=dict(), tau=sampling_time, reference_generator=const_switch_gen, visualization=MotorDashboard(state_plots=['omega', 'i'], reward_plot=True), # define which load to use, feel free to try these examples: # using ExternalSpeedLoad: #load=ExternalSpeedLoad(speed_profile=sinus_lambda, tau=sampling_time, amplitude=10, frequency=2, bias=4) load=ExternalSpeedLoad(speed_profile=saw_lambda, tau=sampling_time, amplitude=40, frequency=5, bias=40) #load=ExternalSpeedLoad(speed_profile=constant_lambda, value=3, load_initializer={'random_init': 'uniform', 'interval': [[20, 80]]}) #(ExternalSpeedLoad will not use the passed initializer and print a corresponding warning instead) # using ConstantSpeedLoad (this class also accepts an initializer): #load=ConstantSpeedLoad(omega_fixed=42, load_initializer={'random_init': 'uniform', 'interval': [[20, 80]]}) ) # After the setup is done, we are ready to simulate the environment # We make use of a standard PI current controller
# Set the DC-link supply voltage u_sup=400, # Pass the previously defined motor parameters motor_parameter=motor_parameter, # Pass the updated motor limits and nominal values limit_values=limit_values, nominal_values=nominal_values, # Define which states will be shown in the state observation (what we can "measure") state_filter=['i_sd', 'i_sq', 'epsilon'], # Defines which variables to plot via the builtin dashboard monitor visualization=MotorDashboard( plots=['i_sd', 'i_sq', 'action_0', 'action_1', 'mean_reward']), visu_period=1, ) # Now we apply the wrapper defined at the beginning of this script env = AppendLastActionWrapper(env) # We flatten the observation (append the reference vector to the state vector such that # the environment will output just a single vector with both information) # This is necessary for compatibility with kerasRL2 env = FlattenObservation(env) # Read the dimension of the action space # this allows us to define a proper learning agent for this task nb_actions = env.action_space.shape[0]
from gym.wrappers import FlattenObservation import sys, os sys.path.append(os.path.abspath(os.path.join('..'))) import gym_electric_motor as gem from gym_electric_motor.reward_functions import WeightedSumOfErrors from gym_electric_motor.visualization import MotorDashboard from gym_electric_motor.reference_generators import WienerProcessReferenceGenerator if __name__ == '__main__': env = gem.make( 'emotor-dc-series-disc-v1', state_filter=['omega', 'i'], # Pass an instance reward_function=WeightedSumOfErrors(observed_states='i'), visualization=MotorDashboard(update_period=1e-2, visu_period=1e-1, plotted_variables=['omega', 'i', 'u']), converter='Disc-1QC', # Take standard class and pass parameters (Load) motor_parameter=dict(r_a=15e-3, r_e=15e-3, l_a=1e-3, l_e=1e-3), load_parameter=dict(a=0, b=.1, c=.1, j_load=0.04), # Pass a string (with extra parameters) ode_solver='euler', solver_kwargs={}, # Pass a Class with extra parameters reference_generator=WienerProcessReferenceGenerator(reference_state='i', sigma_range=(3e-3, 3e-2)) ) env = FlattenObservation(env) nb_actions = env.action_space.n window_length = 1 model = Sequential()
Controlled Quantity: 'omega' Limitations: Physical limitations of the motor will be Current. Converter : FourQuadrantConverter from converters.py """ if __name__ == '__main__': """ Continuous mode: The action is the average (normalized) voltage per time step which is assumed to be transferred to a PWM/SVM converter to generate the switching sequence. Discrete mode: The action is the switching state of the power converter i.e., a quantity from a discrete set of switching states which can be generated by the converter. """ env = gem.make( 'DcPermExDisc-v1', visualization=MotorDashboard( state_plots=['omega', 'torque', 'i', 'u', 'u_sup']), ode_solver='scipy.solve_ivp', solver_kwargs=dict(), reference_generator=rg.SwitchedReferenceGenerator( sub_generators=[ rg.SinusoidalReferenceGenerator, rg.WienerProcessReferenceGenerator(), rg.StepReferenceGenerator() ], p=[0.1, 0.8, 0.1], super_episode_length=(1000, 10000)), # Override the observation of the current limit of 'i' which is selected per default # The 'on_off' Controller cannot comply to any limits. constraints=()) # Assign a simple on/off controller
Controlled Quantity: 'omega' Limitations: Physical limitations of the motor will be Current. Converter : FourQuadrantConverter from converters.py """ if __name__ == '__main__': """ Continuous mode: The action is the average (normalized) voltage per time step which is assumed to be transferred to a PWM/SVM converter to generate the switching sequence. Discrete mode: The action is the switching state of the power converter i.e., a quantity from a discrete set of switching states which can be generated by the converter. """ env = gem.make('DcPermExDisc-v1', visualization=MotorDashboard( plots=['omega', 'torque', 'i', 'u', 'u_sup'], visu_period=1), ode_solver='scipy.solve_ivp', solver_kwargs=dict(), reference_generator=rg.SwitchedReferenceGenerator( sub_generators=[ rg.SinusoidalReferenceGenerator, rg.WienerProcessReferenceGenerator(), rg.StepReferenceGenerator() ], p=[0.1, 0.8, 0.1], super_episode_length=(1000, 10000))) # Assign a simple on/off controller controller = Controller.make('on_off', env) state, reference = env.reset()
p=3, j_rotor=0.06) # Change the motor operational limits (important when limit violations can terminate and reset the environment) limit_values = dict(i=160 * 1.41, omega=12000 * np.pi / 30, u=450) # Change the motor nominal values nominal_values = {key: 0.7 * limit for key, limit in limit_values.items()} # Create the environment env = gem.make( # Choose the permanent magnet synchronous motor with continuous-control-set 'PMSMCont-v1', # Pass a class with extra parameters visualization=MotorDashboard(state_plots=['i_sq', 'i_sd'], action_plots='all', reward_plot=True, episodic_plots=[MeanEpisodeRewardPlot()]), # Set the mechanical load to have constant speed load=ConstantSpeedLoad(omega_fixed=1000 * np.pi / 30), # Set the frame of control inputs, dq is the flux oriented coordinate system control_space='dq', # Define which numerical solver is to be used for the simulation ode_solver='scipy.solve_ivp', solver_kwargs={}, # Pass the previously defined reference generator reference_generator=rg, # Set weighting of different addends of the reward function
def set_env(time_limit=True, gamma=0.99, N=0, M=0, training=True, callbacks=[]): # define motor arguments motor_parameter = dict( p=3, # [p] = 1, nb of pole pairs r_s=17.932e-3, # [r_s] = Ohm, stator resistance l_d=0.37e-3, # [l_d] = H, d-axis inductance l_q=1.2e-3, # [l_q] = H, q-axis inductance psi_p=65.65e-3, # [psi_p] = Vs, magnetic flux of the permanent magnet ) u_sup = 350 nominal_values = dict(omega=4000 * 2 * np.pi / 60, i=230, u=u_sup) limit_values = dict(omega=4000 * 2 * np.pi / 60, i=1.5 * 230, u=u_sup) q_generator = WienerProcessReferenceGenerator(reference_state='i_sq') d_generator = WienerProcessReferenceGenerator(reference_state='i_sd') rg = MultipleReferenceGenerator([q_generator, d_generator]) tau = 1e-5 max_eps_steps = 10000 if training: motor_initializer = { 'random_init': 'uniform', 'interval': [[-230, 230], [-230, 230], [-np.pi, np.pi]] } # motor_initializer={'random_init': 'gaussian'} reward_function = gem.reward_functions.WeightedSumOfErrors( observed_states=['i_sq', 'i_sd'], reward_weights={ 'i_sq': 10, 'i_sd': 10 }, constraint_monitor=SqdCurrentMonitor(), gamma=gamma, reward_power=1) else: motor_initializer = {'random_init': 'gaussian'} reward_function = gem.reward_functions.WeightedSumOfErrors( observed_states=['i_sq', 'i_sd'], reward_weights={ 'i_sq': 0.5, 'i_sd': 0.5 }, # comparable reward constraint_monitor=SqdCurrentMonitor(), gamma=0.99, # comparable reward reward_power=1) # creating gem environment env = gem.make( # define a PMSM with discrete action space "PMSMDisc-v1", # visualize the results visualization=MotorDashboard(plots=['i_sq', 'i_sd', 'reward']), # parameterize the PMSM and update limitations motor_parameter=motor_parameter, limit_values=limit_values, nominal_values=nominal_values, # define the random initialisation for load and motor load='ConstSpeedLoad', load_initializer={ 'random_init': 'uniform', }, motor_initializer=motor_initializer, reward_function=reward_function, # define the duration of one sampling step tau=tau, u_sup=u_sup, # turn off terminations via limit violation, parameterize the rew-fct reference_generator=rg, ode_solver='euler', callbacks=callbacks, ) # appling wrappers and modifying environment env.action_space = Discrete(7) eps_idx = env.physical_system.state_names.index('epsilon') i_sd_idx = env.physical_system.state_names.index('i_sd') i_sq_idx = env.physical_system.state_names.index('i_sq') if time_limit: gem_env = TimeLimit( AppendNLastActionsWrapper( AppendNLastOberservationsWrapper( EpsilonWrapper(FlattenObservation(env), eps_idx, i_sd_idx, i_sq_idx), N), M), max_eps_steps) else: gem_env = AppendNLastActionsWrapper( AppendNLastOberservationsWrapper( EpsilonWrapper(FlattenObservation(env), eps_idx, i_sd_idx, i_sq_idx), N), M) return gem_env
# Here we use a square root function reward_power=0.5, # Define which state variables are to be monitored concerning limit violations # None means that a limit violation will never trigger an env.reset observed_states=None, # Define which numerical solver is to be used for the simulation ode_solver='scipy.solve_ivp', solver_kwargs=dict(method='BDF'), # Define and parameterize the reference generator for the speed reference reference_generator=WienerProcessReferenceGenerator(reference_state='omega', sigma_range=(5e-3, 1e-2)), # Defines which variables to plot via the builtin dashboard monitor visualization=MotorDashboard(['omega', 'torque', 'i', 'u', 'u_sup', 'reward']), visu_period=1, ) # For data processing we want to flatten the env output, # which means that the env will only output one array that contains states and references consecutively state, ref = env.reset() env = FlattenObservation(env) obs = env.reset() # Read the dimension of the action space # this allows us to define a proper learning agent for this task nb_actions = env.action_space.shape[0] # CAUTION: Do not use layers that behave differently in training and testing # (e.g. dropout, batch-normalization, etc..) # Reason is a bug in TF2 where not the learning_phase_tensor is extractable
if __name__ == '__main__': """ Continuous mode: The action is the average (normalized) voltage per time step which is assumed to be transferred to a PWM converter to generate the switching sequence. Discrete mode: The action is the switching state of the power converter i.e., a quantity from a discrete set of switching states which can be generated by the converter. """ env = gem.make( 'DcSeriesCont-v1', # replace with 'DcSeriesDisc-v1' for discrete controllers state_filter=['omega', 'i'], # Pass an instance visualization=MotorDashboard( state_plots=['i', 'omega'], reward_plot=True, action_plots='all', additional_plots=[CumulativeConstraintViolationPlot(), MeanEpisodeRewardPlot()] ), # Take standard class and pass parameters (Load) motor_parameter=dict(r_a=15e-3, r_e=15e-3, l_a=100e-3, l_e=100e-3), load_parameter=dict(a=0, b=.1, c=.1, j_load=0.04), # Pass a string (with extra parameters) ode_solver='euler', solver_kwargs={}, # Pass a Class with extra parameters reference_generator=rg.WienerProcessReferenceGenerator() ) # Assign a PI-controller to the speed control problem controller = Controller.make('pi_controller', env) state, reference = env.reset() # get the system time to measure program duration