def test_equal_loads(self): """ Checks that same seed gives the same loads, solar production and rewards when set_parameters have been used """ env1 = ActiveEnv(seed=3) env2 = ActiveEnv(seed=3) env2.set_parameters({ 'forecast_horizon': 10, 'state_space': ['sun', 'demand', 'imbalance'] }) for _ in range(5): load1 = env1.powergrid.load['p_mw'] load2 = env2.powergrid.load['p_mw'] assert norm(load1 - load2) < 10e-5 # e sun1 = env1.powergrid.sgen['p_mw'] sun2 = env2.powergrid.sgen['p_mw'] assert norm(sun1 - sun2) < 10e-5 action = env1.action_space.sample() ob1, reward1, episode_over1, info1 = env1.step(action) ob2, reward2, episode_over2, info2 = env2.step(action) assert reward1 == reward2
def test_error_term_solar(self): """ Checks that the forecast values for solar irradiance deviates from the actual values """ env = ActiveEnv(seed=3) env.set_parameters({'solar_std': 0.5}) while env.get_solar_forecast()[0, 0] < 0.01: # to avoid night (no sun) action = env.action_space.sample() env.step(action) nominal_sun = env.powergrid.sgen['sn_mva'] solar_forecast = nominal_sun * env.get_solar_forecast()[:, 0] solar = -env.powergrid.sgen['p_mw'] assert norm(solar_forecast - solar) > 0.01
def test_reward_parameters(self): """ Checks that rewards are different for different reward setups """ env1 = ActiveEnv(seed=3) env1.set_parameters({'reward_terms': ['voltage']}) env2 = ActiveEnv(seed=3) env2.set_parameters({'reward_terms': ['current']}) action = env1.action_space.sample() reward1 = 0 while reward1 == 0: ob1, reward1, episode_over1, info1 = env1.step(action) ob2, reward2, episode_over2, info2 = env2.step(action) assert reward1 != reward2
def test_constant_consumption(self): """ Checks that the total demand and total modified demand is the same. In other words, checks that consumption is shifted and not altered. This should hold when 'hours' is even and force_commitments is True """ env = ActiveEnv(force_commitments=True) env.set_parameters({'demand_std': 0}) hours = 24 for hour in range(hours): action = 1 * np.ones(len(env.powergrid.load)) ob, reward, episode_over, info = env.step(action) total_demand = env.get_scaled_demand_forecast()[:, :hours].sum() total_modified_demand = env.resulting_demand[:hours].sum() assert np.abs(total_demand - total_modified_demand) < 10e-6
def load_env(model_name='flexible_load_first',seed=9): #flexible_load_first, overnight, larger_margin_cost, discount_06, flex50 model_path = os.path.join(MODEL_PATH,model_name) params_name = model_name +'_params.p' param_path = os.path.join(MODEL_PATH,params_name) try: model = DDPG.load(model_path) except: model = PPO1.load(model_path) env = ActiveEnv(seed=seed) with open(param_path,'rb') as f: params = pickle.load(f) env.set_parameters(params) model.set_env(env) return model, env
def test_reset_seed(self): """ Checks that same seeds give same environment when an episode resets """ env1 = ActiveEnv(seed=7) env2 = ActiveEnv(seed=7) env1.set_parameters({'episode_length': 3, 'forecast_horizon': 1}) env2.set_parameters({'episode_length': 3, 'forecast_horizon': 1}) for _ in range(4): action = env1.action_space.sample() ob1, reward1, episode_over1, info1 = env1.step(action) ob2, reward2, episode_over2, info2 = env2.step(action) load1 = env1.powergrid.load['p_mw'] load2 = env2.powergrid.load['p_mw'] assert norm(load1 - load2) < 10e-5 # e
def test_set_params(self): """ Checks that the forecasts are equal when set_parameters is used to change solar and demand scale """ env1 = ActiveEnv(seed=3) solar_scale1 = env1.params['solar_scale'] demand_scale1 = env1.params['demand_scale'] env2 = ActiveEnv(seed=3) env2.set_parameters({ 'solar_scale': solar_scale1 * 2, 'demand_scale': demand_scale1 * 3 }) solar1, solar2 = env1.get_solar_forecast(), env2.get_solar_forecast() demand1, demand2 = env1.get_demand_forecast( ), env2.get_demand_forecast() assert norm(solar1 * 2 - solar2) < 10e-7 assert norm(demand1[0] * 3 - demand2[0]) < 10e-7
def test_action(self): """ Checks that action is taken and updates the network, but only if load is not commited """ flex = 0.1 env = ActiveEnv(force_commitments=True) env.set_parameters({'flexibility': flex, 'demand_std': 0}) env.set_demand_and_solar() demand = copy.copy(env.powergrid.load['p_mw'].values) action1 = np.ones_like(env.last_action) action1 = env.action_space.sample() scaled_action1 = flex * action1 * env.powergrid.load['p_mw'] env._take_action(action1) assert norm(env.powergrid.load['p_mw'].values - (demand + scaled_action1)) < 10e-4 action2 = env.action_space.sample() env._take_action(action2) # action2 should by modified to cancel effect of action1 assert norm(env.last_action + scaled_action1) < 10e-4 assert norm(env.powergrid.load['p_mw'].values - demand) < 10e-4
Training the reinforcement agent using stable baselines """ __author__ = 'Vegard Solberg' __email__ = '*****@*****.**' from active_env.envs.active_network_env import ActiveEnv from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv from stable_baselines.ddpg.policies import LnMlpPolicy from stable_baselines.ddpg.noise import OrnsteinUhlenbeckActionNoise from stable_baselines import DDPG from stable_baselines.ddpg.noise import AdaptiveParamNoiseSpec import numpy as np powerenv = ActiveEnv() powerenv.set_parameters({ 'state_space': ['sun', 'demand', 'imbalance'], 'reward_terms': ['voltage', 'current', 'imbalance'] }) powerenv = DummyVecEnv([lambda: powerenv]) action_mean = np.zeros(powerenv.action_space.shape) action_sigma = 0.3 * np.ones(powerenv.action_space.shape) action_noise = OrnsteinUhlenbeckActionNoise(mean=action_mean, sigma=action_sigma) param_noise = AdaptiveParamNoiseSpec(initial_stddev=0.2, desired_action_stddev=0.01) t_steps = 800000 logdir = 'C:\\Users\\vegar\\Dropbox\\Master\\logs' powermodel = DDPG( LnMlpPolicy,
from keras.layers.normalization import BatchNormalization from rl.agents import DDPGAgent from rl.memory import SequentialMemory from rl.random import OrnsteinUhlenbeckProcess from active_env.envs.twobus_env import TwoBusEnv, PowerEnvOld, PowerEnvOldNormalized from active_env.envs.active_network_env import ActiveEnv ENV_NAME = 'Pendulum-v0' # Get the environment and extract the number of actions. #powergrid = gym.make(ENV_NAME) env = ActiveEnv() env.set_parameters({ 'state_space': ['sun', 'demand', 'imbalance'], 'voltage_weight': 10, 'current_weight': 0.1, 'reward_terms': ['voltage', 'current', 'imbalance'] }) #env = PowerEnvOldNormalized() #env = PowerEnvStep() np.random.seed(123) env.seed(123) assert len(env.action_space.shape) == 1 nb_actions = env.action_space.shape[0] # Next, we build a very simple model. actor = Sequential() actor.add(Flatten(input_shape=(1, ) + env.observation_space.shape)) actor.add(Dense(16))