示例#1
0
# RL CONTROLLER
#Instantiating the control agent(s)
agents = Agent(env, building_info, observations_spaces, actions_spaces)

# Select many episodes for training. In the final run we will set this value to 1 (the buildings run for one year)
episodes = 10

k, c = 0, 0
cost, cum_reward = {}, {}
start = time.time()
# The number of episodes can be replaces by a stopping criterion (i.e. convergence of the average reward)
for e in range(episodes):
    cum_reward[e] = 0
    rewards = []
    state = env.reset()
    done = False
    while not done:
        if k % (1000) == 0:
            print('hour: ' + str(k) + ' of ' + str(8760 * episodes))

        action = agents.select_action(state)
        next_state, reward, done, _ = env.step(action)
        agents.add_to_buffer(state, action, reward, next_state, done)
        state = next_state

        cum_reward[e] += reward[0]
        rewards.append(reward)
        k += 1

    #ddpg
示例#2
0
             policy_kwargs=policy_kwargs,
             env=env,
             batch_size=1024,
             buffer_size=int(5e5),
             verbose=0,
             param_noise=param_noise,
             action_noise=action_noise,
             tensorboard_log=parent_dir + "tensorboard/",
             n_cpu_tf_sess=multiprocessing.cpu_count())

model.learn(total_timesteps=interval * icount,
            log_interval=interval,
            tb_log_name="DDPG_{}".format(time.strftime("%Y%m%d")),
            callback=callbackList)

obs = env.reset()
dones = False
counter = []
while dones == False:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    counter.append(rewards)

env.close()

print("\nFinal costs:")
pp.pprint(env.cost())

# Plot the reward graph
if useBestCallback:
    plot_results([log_dir], interval * icount, results_plotter.X_TIMESTEPS,
示例#3
0
def run(config):
    data_folder = Path(config.data_path)
    building_attributes = data_folder / 'building_attributes.json'
    solar_profile = data_folder / 'solar_generation_1kW.csv'
    building_state_actions = 'buildings_state_action_space.json'
    # building_ids = ["Building_" + str(i) for i in range(1, config.num_buildings + 1)]
    config.num_buildings = 6

    # customized log directory
    hidden = config.hidden_dim
    lr = config.lr
    tau = config.tau
    gamma = config.gamma
    batch_size = config.batch_size
    buffer_length = config.buffer_length
    to_print = lambda x: str(x)
    log_path = "log"+"_hidden"+to_print(hidden)+"_lr"+to_print(lr)+"_tau"+to_print(tau)+"_gamma"+to_print(gamma)+\
                "_batch_size"+to_print(batch_size)+"_buffer_length"+to_print(buffer_length)+"_TIME_PERIOD_1008_MAXACTION_25"+"/"

    logger = SummaryWriter(log_dir=log_path)
    # TODO fix here
    building_ids = ["Building_" + str(i)
                    for i in [1, 2, 5, 6, 7, 8]]  #[1,2,5,6,7,8]
    env = CityLearn(building_attributes,
                    solar_profile,
                    building_ids,
                    buildings_states_actions=building_state_actions,
                    cost_function=[
                        'ramping', '1-load_factor', 'peak_to_valley_ratio',
                        'peak_demand', 'net_electricity_consumption'
                    ])
    observations_spaces, actions_spaces = env.get_state_action_spaces()

    # Instantiating the control agent(s)
    if config.agent_alg == 'MADDPG':
        agents = MA_DDPG(observations_spaces,
                         actions_spaces,
                         hyper_params=vars(config))
    else:
        raise NotImplementedError

    k, c = 0, 0
    cost, cum_reward = {}, {}
    buffer = ReplayBuffer(max_steps=config.buffer_length,
                          num_agents=config.num_buildings,
                          obs_dims=[s.shape[0] for s in observations_spaces],
                          ac_dims=[a.shape[0] for a in actions_spaces])
    # TODO: store np or tensor in buffer?
    start = time.time()
    for e in range(config.n_episodes):
        cum_reward[e] = 0
        rewards = []
        state = env.reset()
        statecast = lambda x: [torch.FloatTensor(s) for s in x]
        done = False
        ss = 0
        while not done:
            if k % (40000 * 4) == 0:
                print('hour: ' + str(k) + ' of ' +
                      str(TIME_PERIOD * config.n_episodes))
            action = agents.select_action(statecast(state), explore=False)
            action = [a.detach().numpy() for a in action]
            # if batch norm:
            action = [np.squeeze(a, axis=0) for a in action]
            ss += 1
            #print("action is ", action)
            #print(action[0].shape)
            #raise NotImplementedError
            next_state, reward, done, _ = env.step(action)
            reward = reward_function(
                reward)  # See comments in reward_function.py
            #buffer_reward = [-r for r in reward]
            # agents.add_to_buffer()
            buffer.push(statecast(state), action, reward,
                        statecast(next_state), done)
            # if (len(buffer) >= config.batch_size and
            #         (e % config.steps_per_update) < config.n_rollout_threads):
            if len(buffer) >= config.batch_size:
                if USE_CUDA:
                    agents.to_train(device='gpu')
                else:
                    agents.to_train(device='cpu')
                for a_i in range(agents.n_buildings):
                    sample = buffer.sample(config.batch_size, to_gpu=USE_CUDA)
                    agents.update(sample,
                                  a_i,
                                  logger=logger,
                                  global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag='net electric consumption',
                              scalar_value=env.net_electric_consumption[-1],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag='env cost total',
                              scalar_value=env.cost()['total'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="1 load factor",
                              scalar_value=env.cost()['1-load_factor'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="peak to valley ratio",
                              scalar_value=env.cost()['peak_to_valley_ratio'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(tag="peak demand",
                              scalar_value=env.cost()['peak_demand'],
                              global_step=e * TIME_PERIOD + ss)
            logger.add_scalar(
                tag="net energy consumption",
                scalar_value=env.cost()['net_electricity_consumption'],
                global_step=e * TIME_PERIOD + ss)
            net_energy_consumption_wo_storage = env.net_electric_consumption[
                -1] + env.electric_generation[
                    -1] - env.electric_consumption_cooling_storage[
                        -1] - env.electric_consumption_dhw_storage[-1]
            logger.add_scalar(tag="net energy consumption without storage",
                              scalar_value=net_energy_consumption_wo_storage,
                              global_step=e * TIME_PERIOD + ss)

            for id, r in enumerate(reward):
                logger.add_scalar(tag="agent {} reward ".format(id),
                                  scalar_value=r,
                                  global_step=e * TIME_PERIOD + ss)

            state = next_state
            cum_reward[e] += reward[0]
            k += 1
            cur_time = time.time()
            # print("average time : {}s/iteration at iteration {}".format((cur_time - start) / (60.0 * k), k))
        cost[e] = env.cost()
        if c % 1 == 0:
            print(cost[e])
        # add env total cost and reward logger
        logger.add_scalar(tag='env cost total final',
                          scalar_value=env.cost()['total'],
                          global_step=e)
        logger.add_scalar(tag="1 load factor final",
                          scalar_value=env.cost()['1-load_factor'],
                          global_step=e)
        logger.add_scalar(tag="peak to valley ratio final",
                          scalar_value=env.cost()['peak_to_valley_ratio'],
                          global_step=e)
        logger.add_scalar(tag="peak demand final",
                          scalar_value=env.cost()['peak_demand'],
                          global_step=e)
        logger.add_scalar(
            tag="net energy consumption final",
            scalar_value=env.cost()['net_electricity_consumption'],
            global_step=e)
        net_energy_consumption_wo_storage = env.net_electric_consumption[
            -1] + env.electric_generation[
                -1] - env.electric_consumption_cooling_storage[
                    -1] - env.electric_consumption_dhw_storage[-1]
        logger.add_scalar(tag="net energy consumption without storage",
                          scalar_value=net_energy_consumption_wo_storage,
                          global_step=e)
        c += 1
        rewards.append(reward)

    end = time.time()
    print((end - start) / 60.0)
示例#4
0
# Get number of agents in Environment
num_agents = env.n_buildings
print('\nNumber of Agents: ', num_agents)

# Set the size of state observations or state size
print('\nSize of State: ', observations_spaces)
"""
#############################################
STEP 3: Run the RBC Controller to extract baseline costs

"""

# Instantiating the control agent(s)
agent = RBC_Agent(actions_spaces)

state = env.reset()
done = False
rewards_list = []
while not done:
    action = agent.select_action(state)
    next_state, reward, done, _ = env.step(action)
    state = next_state
    rewards_list.append(reward)
cost_rbc = env.get_baseline_cost()
print(cost_rbc)
"""
###################################
STEP 4: Create DDPG Agents from the Agent Class in ddpg_agent.py
A DDPG agent initialized with the following parameters.
======
building_info: Dictionary with building information as described above
示例#5
0
# Create the final dir
final_dir = parent_dir + "final/"
os.makedirs(final_dir, exist_ok=True)

# Tensorboard writer object
writer = SummaryWriter(log_dir=parent_dir + 'tensorboard/')
print("Logging to {}\n".format(parent_dir + 'tensorboard/'))

# Set seeds (TO DO: CHECK PERFORMANCE SAME FOR TWO RUNS WITH SAME SEED)
torch.manual_seed(args.seed)
np.random.seed(args.seed)
env.seed(args.seed)

# Get the Rule Base Controller baseline actions
RBC_agent = RBC_Agent(actions_spacesRBC)
state = RBC_env.reset()
state_list = []
action_list = []
doneRBC = False
while not doneRBC:
    action_RBC = RBC_agent.select_action([
        list(RBC_env.buildings.values())[0].sim_results['hour'][
            RBC_env.time_step]
    ])
    action_list.append(action_RBC)
    state_list.append(state)
    next_stateRBC, rewardsRBC, doneRBC, _ = RBC_env.step(action_RBC)
    state = next_stateRBC
RBC_action_base = np.array(action_list)
RBC_state_base = np.array(state_list)