Пример #1
0
def init_fresh(hyp):
    counters = defaultdict(int)
    paths = utils.get_paths(hyp)
    transition_logger = utils.make_logger("transitions.data", paths["run"])

    env = registry.make(**hyp["env"], logger=transition_logger)
    buffer = memory.make(env, hyp)

    nets = init_nets(env, hyp)
    writers = init_writers(counters, paths)
    optimizers = init_optimizers(hyp)

    target_entropy = nets.pop("target_entropy")
    hyp["target-entropy"] = target_entropy

    rewards = defaultdict(list)
    return {
        "hyp": hyp,
        "paths": paths,
        "counters": counters,
        "env": env,
        "buffer": buffer,
        "nets": nets,
        "writers": writers,
        "optimizers": optimizers,
        "transition_logger": transition_logger,
        "rewards": rewards,
    }
Пример #2
0
def init_fresh(hyp):
    counters = defaultdict(int)
    paths = utils.get_paths(hyp)
    transition_logger = utils.make_logger('transitions.data', paths['run'])

    env = registry.make(**hyp['env'], logger=transition_logger)
    buffer = memory.make(env, hyp)

    nets = init_nets(env, hyp)
    writers = init_writers(counters, paths)
    optimizers = init_optimizers(hyp)

    target_entropy = nets.pop('target_entropy')
    hyp['target-entropy'] = target_entropy

    rewards = defaultdict(list)
    return {
        'hyp': hyp,
        'paths': paths,
        'counters': counters,
        'env': env,
        'buffer': buffer,
        'nets': nets,
        'writers': writers,
        'optimizers': optimizers,
        'transition_logger': transition_logger,
        'rewards': rewards
    }
Пример #3
0
def test_one_battery_charging(cfg, actions, expected_charges):
    env = make('battery', **cfg, n_batteries=1)
    env.reset()

    results = defaultdict(list)
    for action in actions:
        action = np.array(action).reshape(1, 1)
        next_obs, reward, done, info = env.step(action)
        results['charge'].append(info['charge'])

    assert done
    charges = np.squeeze(np.array(results['charge']))
    np.testing.assert_array_almost_equal(charges, expected_charges)
Пример #4
0
def test_one_battery_charging(cfg, actions, expected_losses):
    env = make('battery', **cfg, n_batteries=1)
    env.reset()

    results = defaultdict(list)
    for action in actions:
        action = np.array(action).reshape(1, 1)
        next_obs, reward, done, info = env.step(action)
        results['losses'].append(info['losses_power'])
        results['gross_power'].append(info['gross_power'])

    assert done
    losses = np.squeeze(np.array(results['losses']))
    import pandas as pd
    print(pd.DataFrame(results))
    np.testing.assert_array_almost_equal(losses, expected_losses)
Пример #5
0
def test_make_random_dataset_one_battery():
    env = make('battery',
               n_batteries=1,
               dataset={
                   'name': 'random-dataset',
                   'n': 10000,
                   'n_features': 3
               })

    dataset = env.dataset.dataset

    assert dataset['prices'].shape[0] == 10000
    assert dataset['features'].shape[0] == 10000

    assert len(dataset['prices'].shape) == 3
    assert dataset['features'].shape[1] == 1
    assert dataset['features'].shape[2] == 3
Пример #6
0
def test_make_random_dataset_many_battery():
    env = make('battery',
               n_batteries=4,
               dataset={
                   'name': 'random-dataset',
                   'n': 1000,
                   'n_features': 6,
               })

    data = env.dataset.dataset
    print(data['prices'].shape, data['features'].shape)
    assert data['prices'].shape[0] == 1000

    #  (timestep, feature, battery)
    assert data['features'].shape[0] == 1000
    assert data['features'].shape[1] == 4
    assert data['features'].shape[2] == 6
Пример #7
0
def test_many_battery_step():
    cfgs = defaultdict(list)

    actions, charges = [], []
    for test_case in test_cases:

        #  the config dict
        for k, v in test_case[0].items():
            cfgs[k].append(v)

        actions.append(test_case[1])
        charges.append(test_case[2])

    cfgs['episode_length'] = 3
    #  actions = (3, 3)
    #  needs to be timestep first!
    actions = np.array(actions).T
    expected_charges = np.array(charges).T

    env = make('battery',
               n_batteries=len(test_cases),
               **cfgs,
               dataset={
                   'name': 'random-dataset',
                   'n_features': 10
               })

    #  test 1
    np.testing.assert_array_equal(cfgs['power'], env.power[0, 0])
    assert env.power.shape == (len(test_cases), 1)

    obs = env.reset()
    results = defaultdict(list)
    for action in actions:
        action = np.array(action).reshape(len(test_cases), 1)
        next_obs, reward, done, info = env.step(action)
        print(env.charge, 'charge')
        results['charge'].append(info['charge'])
        #  1 for the charge variable added onto our 10 features
        assert next_obs.shape == (len(test_cases), 10 + 1)

    assert done.all()
    np.testing.assert_array_almost_equal(np.squeeze(results['charge']),
                                         np.squeeze(expected_charges))
Пример #8
0
    def __init__(self,
                 n_batteries=2,
                 power=2.0,
                 capacity=4.0,
                 efficiency=0.9,
                 initial_charge=0.0,
                 episode_length=288,
                 dataset={'name': 'random-dataset'},
                 logger=None):
        self.n_batteries = n_batteries

        self.power = set_battery_config(power, n_batteries)
        self.capacity = set_battery_config(capacity, n_batteries)
        self.efficiency = set_battery_config(efficiency, n_batteries)
        self.initial_charge = set_battery_config(initial_charge, n_batteries)

        self.episode_length = int(episode_length)

        if isinstance(dataset, dict):
            self.dataset = registry.make(**dataset,
                                         logger=logger,
                                         n_batteries=n_batteries)
        else:
            self.dataset = dataset

        self.observation_space = BatteryObservationSpace(self.dataset,
                                                         additional_features=1)
        self.action_space = BatteryActionSpace(n_batteries)

        self.elements = (('observation', self.observation_space.shape,
                          'float32'), ('action', self.action_space.shape,
                                       'float32'), ('reward', (1, ),
                                                    'float32'),
                         ('next_observation', self.observation_space.shape,
                          'float32'), ('done', (1, ), 'bool'))
        self.Transition = namedtuple('Transition',
                                     [el[0] for el in self.elements])
Пример #9
0
def test_battery_init():
    env = make('battery', dataset={'name': 'random-dataset', 'n_features': 16})
Пример #10
0
def load_checkpoint(path, full=True):
    """full mode loads everything, other mode loads only rewards & counters
    idea is to have a way to quickly evaluate checkpoints without loading what we don't need"""

    path = Path(path)

    hyp = load_hyp(path)

    rewards = json_util.load(path / 'rewards.json')
    rewards.pop('time')
    rewards = defaultdict(list, rewards)
    counters = defaultdict(int, json_util.load(path / 'counters.json'))

    results = {
        'path': path,
        'hyp': hyp,
        'rewards': rewards,
        'counters': counters,
    }

    if full:
        #  catch a wierd error when we load old buffers
        try:
            buffer = memory.load(path / 'buffer.pkl')
        except ModuleNotFoundError:
            print('failed to load buffer due to ModuleNotFoundError')
            buffer = None

        env = registry.make(**hyp['env'])
        nets = init_nets(env, hyp)

        #  awkward
        nets.pop('target_entropy')
        for name, net in nets.items():
            #  awkward
            if 'alpha' not in name:
                net.load_weights(path / f'{name}.h5')
                print(f'loaded {name}')

        log_alpha = nets['alpha']
        saved_log_alpha = np.load(path / 'alpha.npy')
        log_alpha.assign(saved_log_alpha)

        optimizers = init_optimizers(hyp)
        for name, opt in optimizers.items():
            opt_path = path / f'{name}.pkl'

            if opt_path.exists():
                # https://stackoverflow.com/questions/49503748/save-and-load-model-optimizer-state
                model = nets[name]
                #  single var
                if 'alpha' in name:
                    wts = [model, ]
                else:
                    wts = model.trainable_variables
                zero_grads = [tf.zeros_like(w) for w in wts]
                opt.apply_gradients(zip(zero_grads, wts))

                with opt_path.open('rb') as fi:
                    opt.set_weights(pickle.load(fi))

        results['env'] = env
        results['nets'] = nets
        results['optimizers'] = optimizers
        results['buffer'] = buffer

    return results
Пример #11
0
    def __init__(self,
                 n_batteries=2,
                 power=2.0,
                 capacity=4.0,
                 efficiency=0.9,
                 initial_charge=0.0,
                 episode_length=288,
                 dataset={"name": "random-dataset"},
                 logger=None,
                 first_reset='train'):
        self.n_batteries = n_batteries

        #  2 = half hourly, 6 = 5 min
        self.timestep = 2

        #  kW
        self.power = set_battery_config(power, n_batteries)
        #  kWh
        self.capacity = set_battery_config(capacity, n_batteries)
        #  %
        self.efficiency = set_battery_config(efficiency, n_batteries)

        if isinstance(initial_charge, str) and initial_charge == "random":
            self.initial_charge = initial_charge
        else:
            #  kWh
            initial_charge = np.clip(initial_charge, 0, 1.0)
            self.initial_charge = set_battery_config(initial_charge * capacity,
                                                     n_batteries)

        self.episode_length = int(episode_length)

        if isinstance(dataset, dict):
            self.dataset = registry.make(**dataset,
                                         logger=logger,
                                         n_batteries=n_batteries)
        else:
            assert dataset.n_batteries == self.n_batteries
            self.dataset = dataset

        self.reset(first_reset)

        self.observation_space = BatteryObservationSpace(self.dataset,
                                                         additional_features=1)
        self.action_space = BatteryActionSpace(n_batteries)

        mask_shape = self.observation_space.get_mask_shape()

        self.elements = (
            ("observation", self.observation_space.shape, "float32"),
            ("action", self.action_space.shape, "float32"),
            ("reward", (1, ), "float32"),
            ("next_observation", self.observation_space.shape, "float32"),
            ("done", (1, ), "bool"),
            #  attention specific - TODO toggle these out for non attention
            ("observation_mask", mask_shape, "float32"),
            ("next_observation_mask", mask_shape, "float32"),
        )

        self.Transition = namedtuple("Transition",
                                     [el[0] for el in self.elements])
Пример #12
0
    print(f'\nfound best checkpoint at {path}')
    return best


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('run')
    args = parser.parse_args()
    run_path = args.run

    checkpoints = checkpoint.load(run_path)
    checkpoint = get_best_checkpoint(checkpoints)
    print(checkpoint.keys())

    hyp = checkpoint['hyp']
    env = registry.make('lunar')

    actor = checkpoint['nets']['actor']

    env.reset()
    obs = env.reset().reshape(1, -1)
    done = False
    episode_reward = 0

    frames = []
    while not done:
        _, _, action = actor(obs)
        frames.append(env.env.render('rgb_array'))
        next_obs, reward, done = env.step(np.array(action))
        episode_reward += reward
        obs = next_obs