def log_diagnostics(self):
        Q_0_0 = []
        Q_0_1 = []
        Q_1_0 = []
        Q_1_1 = []
        Q_0_0_0 = self.agents[0].get_critic_value(np.array([[1, 1, 0, 1, 0]]))
        Q_0_0_1 = self.agents[0].get_critic_value(np.array([[1, 1, 0, 0, 1]]))
        Q_0_1_0 = self.agents[0].get_critic_value(np.array([[1, 0, 1, 1, 0]]))
        Q_0_1_1 = self.agents[0].get_critic_value(np.array([[1, 0, 1, 0, 1]]))
        Q_1_0_0 = self.agents[1].get_critic_value(np.array([[1, 1, 0, 1, 0]]))
        Q_1_0_1 = self.agents[1].get_critic_value(np.array([[1, 1, 0, 0, 1]]))
        Q_1_1_0 = self.agents[1].get_critic_value(np.array([[1, 0, 1, 1, 0]]))
        Q_1_1_1 = self.agents[1].get_critic_value(np.array([[1, 0, 1, 0, 1]]))
        Q_0_0.append(Q_0_0_0)
        Q_0_0.append(Q_1_0_0)
        Q_0_1.append(Q_0_0_1)
        Q_0_1.append(Q_1_0_1)
        Q_1_0.append(Q_0_1_0)
        Q_1_0.append(Q_1_1_0)
        Q_1_1.append(Q_0_1_1)
        Q_1_1.append(Q_1_1_1)      
        for i in range(self.agent_num):
            tabular.record('max-path-return_agent_{}'.format(i), self._max_path_return[i])
            tabular.record('mean-path-return_agent_{}'.format(i), self._mean_path_return[i])
            tabular.record('last-path-return_agent_{}'.format(i), self._last_path_return[i])
            tabular.record('Q-value-0-0_{}'.format(i), Q_0_0[i])
            tabular.record('Q-value-0-1_{}'.format(i), Q_0_1[i])
            tabular.record('Q-value-1-0_{}'.format(i), Q_1_0[i])
            tabular.record('Q-value-1-1_{}'.format(i), Q_1_1[i])

        tabular.record('episodes', self._n_episodes)
        tabular.record('episode_reward', self._n_episodes)
        tabular.record('total-samples', self._total_samples)
 def log_diagnostics(self):
     for i in range(self.agent_num):
         tabular.record('max-path-return_agent_{}'.format(i), self._max_path_return[i])
         tabular.record('mean-path-return_agent_{}'.format(i), self._mean_path_return[i])
         tabular.record('last-path-return_agent_{}'.format(i), self._last_path_return[i])
     tabular.record('episodes', self._n_episodes)
     tabular.record('episode_reward', self._n_episodes)
     tabular.record('total-samples', self._total_samples)