def init_fresh(hyp): counters = defaultdict(int) paths = utils.get_paths(hyp) transition_logger = utils.make_logger("transitions.data", paths["run"]) env = registry.make(**hyp["env"], logger=transition_logger) buffer = memory.make(env, hyp) nets = init_nets(env, hyp) writers = init_writers(counters, paths) optimizers = init_optimizers(hyp) target_entropy = nets.pop("target_entropy") hyp["target-entropy"] = target_entropy rewards = defaultdict(list) return { "hyp": hyp, "paths": paths, "counters": counters, "env": env, "buffer": buffer, "nets": nets, "writers": writers, "optimizers": optimizers, "transition_logger": transition_logger, "rewards": rewards, }
def init_fresh(hyp): counters = defaultdict(int) paths = utils.get_paths(hyp) transition_logger = utils.make_logger('transitions.data', paths['run']) env = registry.make(**hyp['env'], logger=transition_logger) buffer = memory.make(env, hyp) nets = init_nets(env, hyp) writers = init_writers(counters, paths) optimizers = init_optimizers(hyp) target_entropy = nets.pop('target_entropy') hyp['target-entropy'] = target_entropy rewards = defaultdict(list) return { 'hyp': hyp, 'paths': paths, 'counters': counters, 'env': env, 'buffer': buffer, 'nets': nets, 'writers': writers, 'optimizers': optimizers, 'transition_logger': transition_logger, 'rewards': rewards }
def test_one_battery_charging(cfg, actions, expected_charges): env = make('battery', **cfg, n_batteries=1) env.reset() results = defaultdict(list) for action in actions: action = np.array(action).reshape(1, 1) next_obs, reward, done, info = env.step(action) results['charge'].append(info['charge']) assert done charges = np.squeeze(np.array(results['charge'])) np.testing.assert_array_almost_equal(charges, expected_charges)
def test_one_battery_charging(cfg, actions, expected_losses): env = make('battery', **cfg, n_batteries=1) env.reset() results = defaultdict(list) for action in actions: action = np.array(action).reshape(1, 1) next_obs, reward, done, info = env.step(action) results['losses'].append(info['losses_power']) results['gross_power'].append(info['gross_power']) assert done losses = np.squeeze(np.array(results['losses'])) import pandas as pd print(pd.DataFrame(results)) np.testing.assert_array_almost_equal(losses, expected_losses)
def test_make_random_dataset_one_battery(): env = make('battery', n_batteries=1, dataset={ 'name': 'random-dataset', 'n': 10000, 'n_features': 3 }) dataset = env.dataset.dataset assert dataset['prices'].shape[0] == 10000 assert dataset['features'].shape[0] == 10000 assert len(dataset['prices'].shape) == 3 assert dataset['features'].shape[1] == 1 assert dataset['features'].shape[2] == 3
def test_make_random_dataset_many_battery(): env = make('battery', n_batteries=4, dataset={ 'name': 'random-dataset', 'n': 1000, 'n_features': 6, }) data = env.dataset.dataset print(data['prices'].shape, data['features'].shape) assert data['prices'].shape[0] == 1000 # (timestep, feature, battery) assert data['features'].shape[0] == 1000 assert data['features'].shape[1] == 4 assert data['features'].shape[2] == 6
def test_many_battery_step(): cfgs = defaultdict(list) actions, charges = [], [] for test_case in test_cases: # the config dict for k, v in test_case[0].items(): cfgs[k].append(v) actions.append(test_case[1]) charges.append(test_case[2]) cfgs['episode_length'] = 3 # actions = (3, 3) # needs to be timestep first! actions = np.array(actions).T expected_charges = np.array(charges).T env = make('battery', n_batteries=len(test_cases), **cfgs, dataset={ 'name': 'random-dataset', 'n_features': 10 }) # test 1 np.testing.assert_array_equal(cfgs['power'], env.power[0, 0]) assert env.power.shape == (len(test_cases), 1) obs = env.reset() results = defaultdict(list) for action in actions: action = np.array(action).reshape(len(test_cases), 1) next_obs, reward, done, info = env.step(action) print(env.charge, 'charge') results['charge'].append(info['charge']) # 1 for the charge variable added onto our 10 features assert next_obs.shape == (len(test_cases), 10 + 1) assert done.all() np.testing.assert_array_almost_equal(np.squeeze(results['charge']), np.squeeze(expected_charges))
def __init__(self, n_batteries=2, power=2.0, capacity=4.0, efficiency=0.9, initial_charge=0.0, episode_length=288, dataset={'name': 'random-dataset'}, logger=None): self.n_batteries = n_batteries self.power = set_battery_config(power, n_batteries) self.capacity = set_battery_config(capacity, n_batteries) self.efficiency = set_battery_config(efficiency, n_batteries) self.initial_charge = set_battery_config(initial_charge, n_batteries) self.episode_length = int(episode_length) if isinstance(dataset, dict): self.dataset = registry.make(**dataset, logger=logger, n_batteries=n_batteries) else: self.dataset = dataset self.observation_space = BatteryObservationSpace(self.dataset, additional_features=1) self.action_space = BatteryActionSpace(n_batteries) self.elements = (('observation', self.observation_space.shape, 'float32'), ('action', self.action_space.shape, 'float32'), ('reward', (1, ), 'float32'), ('next_observation', self.observation_space.shape, 'float32'), ('done', (1, ), 'bool')) self.Transition = namedtuple('Transition', [el[0] for el in self.elements])
def test_battery_init(): env = make('battery', dataset={'name': 'random-dataset', 'n_features': 16})
def load_checkpoint(path, full=True): """full mode loads everything, other mode loads only rewards & counters idea is to have a way to quickly evaluate checkpoints without loading what we don't need""" path = Path(path) hyp = load_hyp(path) rewards = json_util.load(path / 'rewards.json') rewards.pop('time') rewards = defaultdict(list, rewards) counters = defaultdict(int, json_util.load(path / 'counters.json')) results = { 'path': path, 'hyp': hyp, 'rewards': rewards, 'counters': counters, } if full: # catch a wierd error when we load old buffers try: buffer = memory.load(path / 'buffer.pkl') except ModuleNotFoundError: print('failed to load buffer due to ModuleNotFoundError') buffer = None env = registry.make(**hyp['env']) nets = init_nets(env, hyp) # awkward nets.pop('target_entropy') for name, net in nets.items(): # awkward if 'alpha' not in name: net.load_weights(path / f'{name}.h5') print(f'loaded {name}') log_alpha = nets['alpha'] saved_log_alpha = np.load(path / 'alpha.npy') log_alpha.assign(saved_log_alpha) optimizers = init_optimizers(hyp) for name, opt in optimizers.items(): opt_path = path / f'{name}.pkl' if opt_path.exists(): # https://stackoverflow.com/questions/49503748/save-and-load-model-optimizer-state model = nets[name] # single var if 'alpha' in name: wts = [model, ] else: wts = model.trainable_variables zero_grads = [tf.zeros_like(w) for w in wts] opt.apply_gradients(zip(zero_grads, wts)) with opt_path.open('rb') as fi: opt.set_weights(pickle.load(fi)) results['env'] = env results['nets'] = nets results['optimizers'] = optimizers results['buffer'] = buffer return results
def __init__(self, n_batteries=2, power=2.0, capacity=4.0, efficiency=0.9, initial_charge=0.0, episode_length=288, dataset={"name": "random-dataset"}, logger=None, first_reset='train'): self.n_batteries = n_batteries # 2 = half hourly, 6 = 5 min self.timestep = 2 # kW self.power = set_battery_config(power, n_batteries) # kWh self.capacity = set_battery_config(capacity, n_batteries) # % self.efficiency = set_battery_config(efficiency, n_batteries) if isinstance(initial_charge, str) and initial_charge == "random": self.initial_charge = initial_charge else: # kWh initial_charge = np.clip(initial_charge, 0, 1.0) self.initial_charge = set_battery_config(initial_charge * capacity, n_batteries) self.episode_length = int(episode_length) if isinstance(dataset, dict): self.dataset = registry.make(**dataset, logger=logger, n_batteries=n_batteries) else: assert dataset.n_batteries == self.n_batteries self.dataset = dataset self.reset(first_reset) self.observation_space = BatteryObservationSpace(self.dataset, additional_features=1) self.action_space = BatteryActionSpace(n_batteries) mask_shape = self.observation_space.get_mask_shape() self.elements = ( ("observation", self.observation_space.shape, "float32"), ("action", self.action_space.shape, "float32"), ("reward", (1, ), "float32"), ("next_observation", self.observation_space.shape, "float32"), ("done", (1, ), "bool"), # attention specific - TODO toggle these out for non attention ("observation_mask", mask_shape, "float32"), ("next_observation_mask", mask_shape, "float32"), ) self.Transition = namedtuple("Transition", [el[0] for el in self.elements])
print(f'\nfound best checkpoint at {path}') return best if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('run') args = parser.parse_args() run_path = args.run checkpoints = checkpoint.load(run_path) checkpoint = get_best_checkpoint(checkpoints) print(checkpoint.keys()) hyp = checkpoint['hyp'] env = registry.make('lunar') actor = checkpoint['nets']['actor'] env.reset() obs = env.reset().reshape(1, -1) done = False episode_reward = 0 frames = [] while not done: _, _, action = actor(obs) frames.append(env.env.render('rgb_array')) next_obs, reward, done = env.step(np.array(action)) episode_reward += reward obs = next_obs