def test_project_log_stochastic_matrix_wrt_kl_divergence(self): """Tests KL-divergence projection routine on some known values.""" matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], [0.2, 1.0, 0.9]]) expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.test_session() as session: projected_matrix = session.run( standard_ops.exp( swap_regret_optimizer. _project_log_stochastic_matrix_wrt_kl_divergence( standard_ops.log(matrix)))) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def test_project_log_stochastic_matrix_wrt_kl_divergence(self): """Tests KL-divergence projection routine on some known values.""" matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], [0.2, 1.0, 0.9]]) expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.cached_session() as session: projected_matrix = session.run( standard_ops.exp( swap_regret_optimizer. _project_log_stochastic_matrix_wrt_kl_divergence( standard_ops.log(matrix)))) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): """Projects its argument onto the set of log-left-stochastic matrices. Args: log_matrix: 2d square tensor, the element-wise logarithm of the matrix to project. Returns: The 2d square tensor that results from projecting exp(`matrix`) onto the set of left-stochastic matrices w.r.t. the KL-divergence applied column-wise. """ # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keepdims=True) log_matrix -= standard_ops.log( standard_ops.reduce_sum( standard_ops.exp(log_matrix), axis=0, keepdims=True)) return log_matrix
def _project_log_stochastic_matrix_wrt_kl_divergence(log_matrix): """Projects its argument onto the set of log-left-stochastic matrices. Args: log_matrix: 2d square tensor, the element-wise logarithm of the matrix to project. Returns: The 2d square tensor that results from projecting exp(`matrix`) onto the set of left-stochastic matrices w.r.t. the KL-divergence applied column-wise. """ # For numerical reasons, make sure that the largest matrix element is zero # before exponentiating. log_matrix -= standard_ops.reduce_max(log_matrix, axis=0, keep_dims=True) log_matrix -= standard_ops.log( standard_ops.reduce_sum( standard_ops.exp(log_matrix), axis=0, keep_dims=True)) return log_matrix
def _stochastic_matrix(self, state): return standard_ops.exp(state)