Пример #1
0
def setup_experiment(sess, agent_config, env_config, paths, seed=None):
    """

    args
        sess (tf.Session)
        agent_config (dict)
        env_config (dict)
        paths (dict)
        seed (int)
    """

    env = energy_py.make_env(**env_config)
    save_args(env_config, path=paths['env_args'])

    if seed:
        logger.info('random seed is {}'.format(seed))
        env.seed(seed)

    agent_config['env'] = env
    agent_config['sess'] = sess
    agent_config['act_path'] = paths['tb_act']
    agent_config['learn_path'] = paths['tb_learn']

    agent = energy_py.make_agent(**agent_config)
    save_args(agent_config, path=paths['agent_args'])

    if hasattr(agent, 'acting_writer'):
        agent.acting_writer.add_graph(sess.graph)

    return agent, env
Пример #2
0
def test_lower_setpoint():
    """ testing reducing the setpoint - increasing cooling generation """

    #  long release time so that we release it all during
    #  the setpoint lowering
    env = energy_py.make_env('flex', capacity=2.0, release_time=10)

    obs = env.reset()
    done = False
    step = 0

    while not done:
        act = np.array(0)
        if step >= 5 and step < 10:
            act = np.array(1)

        if step == 11:
            act = np.array(2)

        next_obs, r, done, i = env.step(act)
        step += 1

    out = pd.DataFrame().from_dict(i)

    expt = out.iloc[:11, :]

    #  check that we discharge everything that we stored
    np.testing.assert_equal(out.loc[:, 'stored'].sum(),
                            out.loc[:, 'discharged'].sum())

    #  check we only discharge when we reduce the setpoint
    np.testing.assert_equal(0, out.loc[:10, 'discharged'].sum())

    np.testing.assert_equal(out.loc[:, 'stored'].sum(),
                            out.loc[11:11, 'discharged'].sum())
Пример #3
0
def test_load_pickle_memory():
    env = energy_py.make_env('cartpole-v0')

    mem = energy_py.make_memory(memory_id='array', env=env)

    state = env.observation_space.sample()
    action = env.action_space.sample()
    reward = 1
    next_state = env.observation_space.sample()
    done = False

    experience = state, action, reward, next_state, done

    mem.remember(*experience)

    mem.save('./results/test_mem.pkl')

    new_mem = energy_py.make_memory(load_path='./results/test_mem.pkl')

    saved_exp = new_mem[0]

    for exp, saved in zip(experience, saved_exp):

        exp, saved = np.array(exp), np.array(saved)

        np.testing.assert_equal(exp, saved)
Пример #4
0
def test_release_when_full():
    #  long release time so that we release it all during
    #  the setpoint lowering
    capacity = 0.5
    env = energy_py.make_env('flex', capacity=capacity, release_time=100)

    obs = env.reset()
    done = False
    step = 0

    while not done:
        #  always store
        act = np.array(1)

        next_obs, r, done, i = env.step(act)
        step += 1

    out = pd.DataFrame().from_dict(i)

    expt = out.iloc[:, :]

    #  calculate when we should have discharged

    cumulative_demand = []
    for idx, row in expt.iterrows():

        cumulative_demand.append(row.loc['site_demand'])
        if sum(cumulative_demand) >= capacity:
            np.testing.assert_almost_equal(row.loc['discharged'],
                                           sum(cumulative_demand))
            cumulative_demand = []
Пример #5
0
def test_raise_sp():
    """ testing raising the setpoint - reducing cooling generation """
    env = energy_py.make_env('flex', capacity=2.0, release_time=4)

    obs = env.reset()
    done = False
    step = 0

    while not done:
        act = np.array(0)
        if step >= 3 and step < 7:
            act = np.array(1)

        next_obs, r, done, i = env.step(act)
        step += 1

    out = pd.DataFrame().from_dict(i)

    expt = out.iloc[:12, :]

    #  check that we charge and discharge equal amounts of energy
    np.testing.assert_equal(expt['stored'].sum(), expt['discharged'].sum())

    #  check the timing of the discharge
    #  making an assumption the capacity is big enough
    #  could fail if I change the example dataset
    #  maybe better to use a test dataset TODO
    np.testing.assert_array_equal(expt.loc[3:7, 'stored'],
                                  expt.loc[7:11, 'discharged'])

    np.testing.assert_array_equal(expt.loc[3, 'stored'],
                                  expt.loc[7, 'discharged'])
Пример #6
0
def test_no_op():
    config = default_config
    config['initial_charge'] = 0.5
    config['capacity'] = 4.0

    env = energy_py.make_env(**config)
    obs = env.reset()

    rew, next_obs, d, i = env.step(0)

    charge = env.get_state_variable('C_charge_level [MWh]')

    expected_charge = 2.0

    assert charge == expected_charge
Пример #7
0
def setup_agent(sess, double_q=False):
    """
    Sets up an agent & fills memory

    args
        sess (tf.Session)

    returns
        agent (energy_py DQN agent)
        env (energy_py Battery environment)
    """

    env = energy_py.make_env(
        '2048',
        observation_dims='2D'
    )

    #  use high learning rate to get weight changes
    agent = energy_py.make_agent(
        agent_id='dqn',
        sess=sess,
        env=env,
        total_steps=10,
        discount=0.9,
        memory_type='deque',
        learning_rate=1.0,
        double_q=double_q,
        update_target_net=100,

        network='conv',
        filters=(8, 16),
        kernels=(2, 2),
        strides=(1, 1)

    )

    for step in range(48):
        obs = env.observation_space.sample()
        action = env.action_space.sample()
        reward = random.random() * 10
        next_obs = env.observation_space.sample()
        done = random.choice([True, False])
        agent.remember(obs, action, reward, next_obs, done)

    batch = agent.memory.get_batch(agent.batch_size)

    return agent, batch, env
Пример #8
0
def test_no_op():
    env = energy_py.make_env('flex')

    obs = env.reset()
    done = False
    step = 0

    while not done:
        act = np.array(0)
        next_obs, r, done, i = env.step(act)
        step += 1

    info = pd.DataFrame().from_dict(i)

    check_energy_balance(info)

    np.testing.assert_equal(info['reward'].sum(), 0)
Пример #9
0
def test_decrease_setpoint():
    """ tests the precooling - but sets release time and capacity high """
    env = energy_py.make_env(
        'flex',
        capacity=4.0,
        supply_capacity=100, # large to ignore the effect
        release_time=100,  #  large to ignore the effect
        supply_power=0.05,
        episode_length=10,
        episode_sample='random'
    )

    obs = env.reset()
    env.seed(42)
    done = False
    step = 0

    while not done:
        act = np.array(0)

        if (step >= 2) and (step <= 5):
            act = np.array(2)
            print(step, act)

        next_obs, r, done, i = env.step(act)
        step += 1

    info = pd.DataFrame().from_dict(i)

    sub = info.loc[:, ['site_demand', 'site_consumption', 'setpoint',
                       'stored_supply']]

    print(sub.head(15))

    np.testing.assert_array_equal(
        info.loc[:, 'site_consumption'].values[2:5],
        np.full(5-2, env.supply_power)
    )

    np.testing.assert_almost_equal(
        env.supply_power * 4 / 12 - np.sum(info.loc[:, 'site_demand'].values[2:6]) / 12,
        info.loc[:, 'stored_supply'][5]
    )
Пример #10
0
def test_increase_setpoint():
    env = energy_py.make_env(
        'flex',
        capacity=4.0,
        supply_capacity=0.5,
        release_time=3,
        supply_power=0.05,
        episode_length=10,
        episode_sample='random'
    )

    obs = env.reset()
    env.seed(42)
    done = False
    step = 0

    while not done:
        act = np.array(0)

        if (step >= 2) and (step <= 4):
            act = np.array(1)
            print(step, act)

        next_obs, r, done, i = env.step(act)
        step += 1

    info = pd.DataFrame().from_dict(i)

    sub = info.loc[:, ['site_demand', 'site_consumption', 'setpoint']]

    np.testing.assert_array_equal(
        info.loc[:, 'site_consumption'].values[2:5],
        np.zeros(5-2)
    )

    np.testing.assert_array_almost_equal(
        info.loc[:, 'site_demand'].values[5:5+3] +
        info.loc[:, 'site_demand'].values[2:5],
        info.loc[:, 'site_consumption'].values[5:5+3]
    )
Пример #11
0
def test_no_op():
    env = energy_py.make_env('flex')

    obs = env.reset()
    done = False
    step = 0

    while not done:
        act = np.array(0)
        next_obs, r, done, i = env.step(act)
        step += 1

    out = pd.DataFrame().from_dict(i)

    #  this little bit of computation should be in the flex env

    out['base_costs'] = out['site_demand'] * out['electricity_price']
    out['opt_costs'] = out['site_electricity_consumption'] * out[
        'electricity_price']
    out['delta'] = out['base_costs'] - out['opt_costs']

    np.testing.assert_equal(out['delta'].sum(), 0)
Пример #12
0
import energy_py

default_config = {
    'env_id': 'battery',
    'dataset': 'example',
    'initial_charge': 0,
    'round_trip_eff': 0.9,
    'episode_sample': 'full'
}

env = energy_py.make_env(**default_config)
obs = env.reset()


def test_charge():

    rew, next_obs, d, i = env.step(1.0)

    charge = env.get_state_variable('C_charge_level [MWh]')

    expected_charge = 0.9 * 1.0 / 12

    assert charge == expected_charge


def test_discharge():
    config = default_config
    config['initial_charge'] = 1.0
    config['capacity'] = 4.0

    env = energy_py.make_env(**config)
Пример #13
0
import tensorflow as tf
import energy_py

with tf.Session() as sess:
    env = energy_py.make_env(env_id='battery',
                             episode_length=288,
                             dataset='example')

    agent = energy_py.make_agent(sess=sess,
                                 agent_id='dqn',
                                 env=env,
                                 total_steps=1000000)

    observation = env.reset()

    done = False
    while not done:
        action = agent.act(observation)
        next_observation, reward, done, info = env.step(action)
        training_info = agent.learn()
        observation = next_observation
Пример #14
0
"""
test - check that the step gives the correct one, test by running for n steps and comparing with the indexing method
"""
import energy_py
import numpy as np

env = energy_py.make_env('flex')


def get_state_actions(state):

    actions = env.action_space.discretize(3)

    return [
        np.concatenate([state.reshape(-1),
                        action.reshape(-1)]) for action in actions
    ]


step = 4
state = env.observation_space.data.iloc[step, :]

state_actions = get_state_actions(state)

actions = env.action_space.discrete_spaces

# def get_viable_transitions(step, state, actions):
Пример #15
0
def experiment(agent_config, env_config, total_steps, paths, seed=None):
    """
    Run an experiment.  Episodes are run until total_steps are reached.

    args
        agent_config (dict)
        env_config (dict)
        total_steps (int)
        paths (dict)
        seed (int)

    Agent and environment are created from config dictionaries.
    """
    tf.reset_default_graph()
    with tf.Session() as sess:

        #  optionally set random seeds
        logger.info('random seed is {}'.format(seed))
        if seed:
            seed = int(seed)
            random.seed(seed)
            tf.set_random_seed(seed)
            np.random.seed(seed)

        env = energy_py.make_env(**env_config)
        save_args(env_config, path=paths['env_args'])

        #  add stuff into the agent config dict
        agent_config['env'] = env
        agent_config['env_repr'] = repr(env)
        agent_config['sess'] = sess
        agent_config['act_path'] = paths['tb_act']
        agent_config['learn_path'] = paths['tb_learn']

        #  init agent and save args
        agent = energy_py.make_agent(**agent_config)
        if hasattr(agent, 'acting_writer'):
            agent.acting_writer.add_graph(sess.graph)
        save_args(agent_config, path=paths['agent_args'])

        #  runner helps to manage our experiment
        runner = Runner(sess, paths, total_steps)

        #  outer while loop runs through multiple episodes
        step, episode = 0, 0
        while step < int(total_steps):
            episode += 1
            done = False
            observation = env.reset()

            #  inner while loop runs through a single episode
            while not done:
                step += 1
                #  select an action
                action = agent.act(observation)
                #  take one step through the environment
                next_observation, reward, done, info = env.step(action)
                #  store the experience
                agent.remember(observation, action, reward, next_observation,
                               done)
                runner.record_step(reward)
                #  moving to the next time step
                observation = next_observation

                #  fill the memory up halfway before we learn
                #  TODO the agent should decide what to do internally here
                if step > int(agent.memory.size * 0.5):
                    train_info = agent.learn()

            runner.record_episode(env_info=info)

            save_env_info(env, info, len(runner.episode_rewards),
                          paths['env_histories'])
Пример #16
0
    def _act(self, observation):
        """
        Agent selects action randomly

        returns
             action (np.array)
        """
        return self.action_space.sample()


if __name__ == '__main__':
    import energy_py

    env = energy_py.make_env(
        'Flex-v1',
        flex_size=1,
        max_flex_time=4,
        relax_time=0,
        dataset='tempus')

    a = energy_py.make_agent('naive_flex', env=env, hours=(6, 10, 15, 19))

    o = env.reset()
    done = False
    while not done:
        action = a.act(o)

        o, r, done, i = env.step(action)

        print(action)