Python get_observation示例，custom_envs.utils.utils_env.get_observation Python示例

示例#1

0

显示文件

文件： test_utils_env.py 项目： adolfogonzalez3/custom_envs

def test_get_observation_v3():
    '''Test the get_observation function with version 3.'''
    history = create_random_history()
    loss, grad, weights = utils.get_observation(history, version=3)
    assert isinstance(loss, float)
    assert isinstance(grad, np.ndarray)
    assert isinstance(weights, np.ndarray)
    assert len(grad) == len(weights)

示例#2

0

显示文件

文件： multioptlrs.py 项目： adolfogonzalez3/custom_envs

 def base_step(self, action):
     action = np.reshape([
         action[self.AGENT_FMT.format(i)].ravel()
         for i in range(self.model.size)
     ], (-1, ))
     grad = self.model.get_gradient()
     action = utils_env.get_action_optlrs(action, self.version.action)
     self.model.set_parameters(self.model.parameters - grad * action)
     grad, loss, weights = self.model.get()
     self.history.append(losses=loss, gradients=grad, weights=weights)
     adj_loss, adj_wght, adj_grad = utils_env.get_observation(
         self.history, self.version.observation)
     self.adjusted_history.append(weights=adj_wght,
                                  losses=adj_loss,
                                  gradients=adj_grad)
     state = self.adjusted_history.build_multistate()
     states = {
         self.AGENT_FMT.format(i):
         np.clip(np.nan_to_num(list(v)), -BOUNDS, BOUNDS) - 1
         for i, v in enumerate(state)
     }
     reward = utils_env.get_reward(loss, adj_loss, self.version.reward)
     reward = np.clip(reward, -BOUNDS, BOUNDS)
     terminal = self._terminal()
     if not terminal and loss > 1e4:
         terminal = True
         reward -= (self.max_batches - self.current_step)
     final_loss = None
     if terminal:
         final_loss = self.model.get_loss()
     past_grads = self.history['gradients']
     info = {
         'loss': final_loss,
         'batch_loss': loss,
         'weights_mean': np.mean(np.abs(weights)),
         'weights_sum': np.sum(np.abs(weights)),
         'actions_mean': np.mean(action),
         'actions_std': np.std(action),
         'states_mean': np.mean(np.abs(state)),
         'states_sum': np.sum(np.abs(state)),
         'grads_mean': np.mean(self.history['gradients']),
         'grads_sum': np.sum(self.history['gradients']),
         'loss_mean': np.mean(self.history['losses']),
         'adjusted_loss': float(adj_loss),
         'adjusted_grad': np.mean(np.abs(adj_grad)),
         'grad_diff': np.mean(np.abs(past_grads[0] - past_grads[1]))
     }
     self.model.next()
     return states, reward, terminal, info

示例#3

0

显示文件

文件： test_utils_env.py 项目： adolfogonzalez3/custom_envs

def test_get_observation_not_a_version():
    '''Test the get_observation function with invalid version.'''
    history = create_random_history()
    with pytest.raises(RuntimeError):
        loss, grad, weights = utils.get_observation(history, version=100)

示例#4

0

显示文件

文件： multioptimize.py 项目： adolfogonzalez3/custom_envs

    def base_step(self, action):
        action = np.reshape([
            action[MultiOptimize.AGENT_FMT.format(i)]
            for i in range(self.model.size)
        ], (-1, ))
        if self.version.action == 0:
            sign = np.sign(action)
            mag = np.abs(action) - 3
            action = sign * 10**mag
        elif self.version.action == 1:
            action = action * 1e-3
        else:
            raise RuntimeError()
        self.model.set_parameters(self.model.parameters - action)
        grad, loss, new_weights = self.model.get()

        self.history.append(losses=loss, gradients=grad, weights=new_weights)
        adjusted = utils_env.get_observation(self.history,
                                             self.version.observation)
        adjusted_loss, adjusted_wght, adjusted_grad = adjusted

        if self.version.history == 0 or self.version.history == 4:
            self.adjusted_history.append(gradients=adjusted_grad)
        elif self.version.history == 1:
            self.adjusted_history.append(losses=adjusted_loss,
                                         gradients=adjusted_grad)
        elif self.version.history == 2 or self.version.history == 3:
            self.adjusted_history.append(weights=adjusted_wght,
                                         losses=adjusted_loss,
                                         gradients=adjusted_grad)
        elif self.version.history == 5:
            self.adjusted_history.append(losses=adjusted_loss,
                                         gradients=adjusted_grad,
                                         actions=action)
        else:
            raise RuntimeError()
        state = self.adjusted_history.build_multistate()
        states = {
            MultiOptimize.AGENT_FMT.format(i): list(v)
            for i, v in enumerate(state)
        }
        reward = utils_env.get_reward(loss, adjusted_loss, self.version.reward)
        terminal = self._terminal()

        data_loss = None
        if terminal:
            data_loss = self.model.get_loss()
        past_grads = self.history['gradients']
        info = {
            'loss': data_loss,
            'batch_loss': loss,
            'weights_mean': np.mean(np.abs(new_weights)),
            'weights_sum': np.sum(np.abs(new_weights)),
            'actions_mean': np.mean(action),
            'actions_std': np.std(action),
            'states_mean': np.mean(np.abs(state)),
            'states_sum': np.sum(np.abs(state)),
            'grads_mean': np.mean(self.history['gradients']),
            'grads_sum': np.sum(self.history['gradients']),
            'loss_mean': np.mean(self.history['losses']),
            'adjusted_loss': float(adjusted_loss),
            'adjusted_grad': np.mean(np.abs(adjusted_grad)),
            'grad_diff': np.mean(np.abs(past_grads[0] - past_grads[1]))
        }
        return states, reward, terminal, info