def test_get_observation_v3(): '''Test the get_observation function with version 3.''' history = create_random_history() loss, grad, weights = utils.get_observation(history, version=3) assert isinstance(loss, float) assert isinstance(grad, np.ndarray) assert isinstance(weights, np.ndarray) assert len(grad) == len(weights)
def base_step(self, action): action = np.reshape([ action[self.AGENT_FMT.format(i)].ravel() for i in range(self.model.size) ], (-1, )) grad = self.model.get_gradient() action = utils_env.get_action_optlrs(action, self.version.action) self.model.set_parameters(self.model.parameters - grad * action) grad, loss, weights = self.model.get() self.history.append(losses=loss, gradients=grad, weights=weights) adj_loss, adj_wght, adj_grad = utils_env.get_observation( self.history, self.version.observation) self.adjusted_history.append(weights=adj_wght, losses=adj_loss, gradients=adj_grad) state = self.adjusted_history.build_multistate() states = { self.AGENT_FMT.format(i): np.clip(np.nan_to_num(list(v)), -BOUNDS, BOUNDS) - 1 for i, v in enumerate(state) } reward = utils_env.get_reward(loss, adj_loss, self.version.reward) reward = np.clip(reward, -BOUNDS, BOUNDS) terminal = self._terminal() if not terminal and loss > 1e4: terminal = True reward -= (self.max_batches - self.current_step) final_loss = None if terminal: final_loss = self.model.get_loss() past_grads = self.history['gradients'] info = { 'loss': final_loss, 'batch_loss': loss, 'weights_mean': np.mean(np.abs(weights)), 'weights_sum': np.sum(np.abs(weights)), 'actions_mean': np.mean(action), 'actions_std': np.std(action), 'states_mean': np.mean(np.abs(state)), 'states_sum': np.sum(np.abs(state)), 'grads_mean': np.mean(self.history['gradients']), 'grads_sum': np.sum(self.history['gradients']), 'loss_mean': np.mean(self.history['losses']), 'adjusted_loss': float(adj_loss), 'adjusted_grad': np.mean(np.abs(adj_grad)), 'grad_diff': np.mean(np.abs(past_grads[0] - past_grads[1])) } self.model.next() return states, reward, terminal, info
def test_get_observation_not_a_version(): '''Test the get_observation function with invalid version.''' history = create_random_history() with pytest.raises(RuntimeError): loss, grad, weights = utils.get_observation(history, version=100)
def base_step(self, action): action = np.reshape([ action[MultiOptimize.AGENT_FMT.format(i)] for i in range(self.model.size) ], (-1, )) if self.version.action == 0: sign = np.sign(action) mag = np.abs(action) - 3 action = sign * 10**mag elif self.version.action == 1: action = action * 1e-3 else: raise RuntimeError() self.model.set_parameters(self.model.parameters - action) grad, loss, new_weights = self.model.get() self.history.append(losses=loss, gradients=grad, weights=new_weights) adjusted = utils_env.get_observation(self.history, self.version.observation) adjusted_loss, adjusted_wght, adjusted_grad = adjusted if self.version.history == 0 or self.version.history == 4: self.adjusted_history.append(gradients=adjusted_grad) elif self.version.history == 1: self.adjusted_history.append(losses=adjusted_loss, gradients=adjusted_grad) elif self.version.history == 2 or self.version.history == 3: self.adjusted_history.append(weights=adjusted_wght, losses=adjusted_loss, gradients=adjusted_grad) elif self.version.history == 5: self.adjusted_history.append(losses=adjusted_loss, gradients=adjusted_grad, actions=action) else: raise RuntimeError() state = self.adjusted_history.build_multistate() states = { MultiOptimize.AGENT_FMT.format(i): list(v) for i, v in enumerate(state) } reward = utils_env.get_reward(loss, adjusted_loss, self.version.reward) terminal = self._terminal() data_loss = None if terminal: data_loss = self.model.get_loss() past_grads = self.history['gradients'] info = { 'loss': data_loss, 'batch_loss': loss, 'weights_mean': np.mean(np.abs(new_weights)), 'weights_sum': np.sum(np.abs(new_weights)), 'actions_mean': np.mean(action), 'actions_std': np.std(action), 'states_mean': np.mean(np.abs(state)), 'states_sum': np.sum(np.abs(state)), 'grads_mean': np.mean(self.history['gradients']), 'grads_sum': np.sum(self.history['gradients']), 'loss_mean': np.mean(self.history['losses']), 'adjusted_loss': float(adjusted_loss), 'adjusted_grad': np.mean(np.abs(adjusted_grad)), 'grad_diff': np.mean(np.abs(past_grads[0] - past_grads[1])) } return states, reward, terminal, info