def testGroup_MultiDevice(self): with ops.Graph().as_default() as g: with g.device("/task:0"): a = tf.constant(0, name="a") b = tf.constant(0, name="b") with g.device("/task:1"): c = tf.constant(0, name="c") d = tf.constant(0, name="d") with g.device("/task:2"): tf.group(a.op, b.op, c.op, d.op, name="root") gd = g.as_graph_def() self.assertProtoEquals( """ node { name: "a" op: "Const" device: "/task:0"} node { name: "b" op: "Const" device: "/task:0"} node { name: "c" op: "Const" device: "/task:1"} node { name: "d" op: "Const" device: "/task:1"} node { name: "root/NoOp" op: "NoOp" input: "^a" input: "^b" device: "/task:0" } node { name: "root/NoOp_1" op: "NoOp" input: "^c" input: "^d" device: "/task:1" } node { name: "root" op: "NoOp" input: "^root/NoOp" input: "^root/NoOp_1" device: "/task:2" } """, self._StripGraph(gd), )
def testIndexedSlicesGradientInCondInWhileLoop(self): with ops.Graph().as_default(): embedding_matrix = tf.get_variable( "embedding_matrix", [5, 5], initializer=tf.random_normal_initializer()) def Cond(it, _): return it < 5 def Body(it, cost): embedding = embedding_ops.embedding_lookup(embedding_matrix, [0]) cost = tf.cond(tf.equal(it, 3), lambda: tf.square(cost), lambda: cost + tf.reduce_sum(embedding)) return it + 1, cost _, cost = control_flow_ops.While( Cond, Body, [tf.constant(0), tf.constant(0.0)]) dynamic_grads = tf.gradients(cost, [embedding_matrix])[0] dynamic_grads = tf.segment_sum(dynamic_grads.values, dynamic_grads.indices) embedding = embedding_ops.embedding_lookup(embedding_matrix, [0]) static = tf.square( tf.reduce_sum(embedding) + tf.reduce_sum(embedding) + tf.reduce_sum(embedding)) + tf.reduce_sum(embedding) static_grads = tf.gradients(static, [embedding_matrix])[0] static_grads = tf.segment_sum(static_grads.values, static_grads.indices) with self.test_session() as sess: sess.run(tf.initialize_all_variables()) self.assertAllEqual(*sess.run([static_grads, dynamic_grads]))
def test_maximum_eigenvector_power_method(self): """Tests power method routine on some known left-stochastic matrices.""" matrix1 = np.matrix([[0.6, 0.1, 0.1], [0.0, 0.6, 0.9], [0.4, 0.3, 0.0]]) matrix2 = np.matrix([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.cached_session() as session: eigenvector1 = session.run( swap_regret_optimizer._maximal_eigenvector_power_method( standard_ops.constant(matrix1))) eigenvector2 = session.run( swap_regret_optimizer._maximal_eigenvector_power_method( standard_ops.constant(matrix2))) # Check that eigenvector1 and eigenvector2 are eigenvectors of matrix1 and # matrix2 (respectively) with associated eigenvalue 1. matrix_eigenvector1 = np.tensordot(matrix1, eigenvector1, axes=1) matrix_eigenvector2 = np.tensordot(matrix2, eigenvector2, axes=1) self.assertAllClose(eigenvector1, matrix_eigenvector1, rtol=0, atol=1e-6) self.assertAllClose(eigenvector2, matrix_eigenvector2, rtol=0, atol=1e-6)
def test_project_multipliers_wrt_euclidean_norm(self): """Tests Euclidean projection routine on some known values.""" multipliers1 = standard_ops.constant([-0.1, -0.6, -0.3]) expected_projected_multipliers1 = np.array([0.0, 0.0, 0.0]) multipliers2 = standard_ops.constant([-0.1, 0.6, 0.3]) expected_projected_multipliers2 = np.array([0.0, 0.6, 0.3]) multipliers3 = standard_ops.constant([0.4, 0.7, -0.2, 0.5, 0.1]) expected_projected_multipliers3 = np.array([0.2, 0.5, 0.0, 0.3, 0.0]) with self.test_session() as session: projected_multipliers1 = session.run( external_regret_optimizer. _project_multipliers_wrt_euclidean_norm(multipliers1, 1.0)) projected_multipliers2 = session.run( external_regret_optimizer. _project_multipliers_wrt_euclidean_norm(multipliers2, 1.0)) projected_multipliers3 = session.run( external_regret_optimizer. _project_multipliers_wrt_euclidean_norm(multipliers3, 1.0)) self.assertAllClose(expected_projected_multipliers1, projected_multipliers1, rtol=0, atol=1e-6) self.assertAllClose(expected_projected_multipliers2, projected_multipliers2, rtol=0, atol=1e-6) self.assertAllClose(expected_projected_multipliers3, projected_multipliers3, rtol=0, atol=1e-6)
def testShape(self): with ops.Graph().as_default(): tensor = tf.constant([1.0, 2.0]) self.assertEquals([2], tensor.get_shape()) self.assertEquals([2], control_flow_ops.with_dependencies( [tf.constant(1.0)], tensor).get_shape())
def testIndexedSlicesWithDenseShape(self): with self.test_session(): data = ops.IndexedSlices(tf.constant([1, 2, 3]), tf.constant([0, 1]), dense_shape=tf.constant([3])) zero = tf.constant(0) one = tf.constant(1) less_op = tf.less(zero, one) switch_false, switch_true = control_flow_ops.switch(data, less_op) self.assertAllEqual([1, 2, 3], switch_true.values.eval()) self.assertAllEqual([0, 1], switch_true.indices.eval())
def testGroup_OneDevice(self): with ops.Graph().as_default() as g: with g.device("/task:0"): a = tf.constant(0, name="a") b = tf.constant(0, name="b") tf.group(a.op, b.op, name="root") gd = g.as_graph_def() self.assertProtoEquals(""" node { name: "a" op: "Const" device: "/task:0" } node { name: "b" op: "Const" device: "/task:0" } node { name: "root" op: "NoOp" input: "^a" input: "^b" device: "/task:0" } """, self._StripGraph(gd))
def testCondContext(self): with self.test_session() as sess: x = tf.constant(2) y = tf.constant(5) control_flow_ops.cond(tf.less(x, y), lambda: tf.mul(x, 17), lambda: tf.add(y, 23)) for op in sess.graph.get_operations(): c = op._get_control_flow_context() if c: compare.ProtoEq( c.to_proto(), control_flow_ops.CondContext.from_proto(c.to_proto()).to_proto())
def testGroup_NoDevices(self): with ops.Graph().as_default() as g: a = tf.constant(0, name="a") b = tf.constant(0, name="b") c = tf.constant(0, name="c") tf.group(a.op, b.op, c.op, name="root") gd = g.as_graph_def() self.assertProtoEquals(""" node { name: "a" op: "Const"} node { name: "b" op: "Const"} node { name: "c" op: "Const"} node { name: "root" op: "NoOp" input: "^a" input: "^b" input: "^c" } """, self._StripGraph(gd))
def testCondContext(self): with self.test_session() as sess: x = tf.constant(2) y = tf.constant(5) control_flow_ops.cond(tf.less(x, y), lambda: tf.mul(x, 17), lambda: tf.add(y, 23)) for op in sess.graph.get_operations(): c = op._get_control_flow_context() if c: compare.ProtoEq( c.to_proto(), control_flow_ops.CondContext.from_proto( c.to_proto()).to_proto())
def testIndexedSlicesWithDynamicShapeGradientInWhileLoop(self): for dtype in [dtypes.float32, dtypes.float64]: with self.test_session() as sess: inputs = tf.placeholder(dtype=dtype) initial_outputs = tf.TensorArray(dtype=dtype, dynamic_size=True, size=1) initial_i = tf.constant(0, dtype=dtypes.int32) def Cond(i, _): return i < tf.size(inputs) # pylint: disable=cell-var-from-loop def Body(i, outputs): x = tf.gather(inputs, i) # pylint: disable=cell-var-from-loop outputs = outputs.write(i, x) return i + 1, outputs _, outputs = tf.while_loop(Cond, Body, [initial_i, initial_outputs]) outputs = tf.reduce_sum(outputs.pack()) r = tf.gradients([outputs], [inputs])[0] grad_wr_inputs = ops.convert_to_tensor(r) o, grad = sess.run([outputs, grad_wr_inputs], feed_dict={inputs: [1, 3, 2]}) self.assertEquals(o, 6) self.assertAllEqual(grad, [1] * 3)
def testIndexedSlicesWithShapeGradientInWhileLoop(self): with self.test_session() as sess: num_steps = 9 inputs = tf.placeholder(dtype="float32", shape=[num_steps]) initial_outputs = tf.TensorArray(dtype="float32", size=num_steps) initial_i = tf.constant(0, dtype="int32") def Cond(i, _): return i < num_steps def Body(i, outputs): x = tf.gather(inputs, i) outputs = outputs.write(i, x) return i + 1, outputs _, outputs = tf.while_loop(Cond, Body, [initial_i, initial_outputs]) outputs = tf.reduce_sum(outputs.pack()) r = tf.gradients([outputs], [inputs])[0] grad_wr_inputs = ops.convert_to_tensor(r) o, grad = sess.run([outputs, grad_wr_inputs], feed_dict={inputs: [4, 6, 0, 7, 0, 0, 1, 2, 0]}) self.assertEquals(o, 20) self.assertAllEqual(grad, [1] * num_steps)
def _maximal_eigenvector_power_method(matrix, epsilon=1e-6, maximum_iterations=100): """Returns the maximal right-eigenvector of `matrix` using the power method. Args: matrix: 2D Tensor, the matrix of which we will find the maximal right-eigenvector. epsilon: nonnegative float, if two iterations of the power method differ (in L2 norm) by no more than epsilon, we will terminate. maximum_iterations: nonnegative int, if we perform this many iterations, we will terminate. Result: The maximal right-eigenvector of `matrix`. Raises: ValueError: If the `matrix` tensor is not floating-point, or if the `epsilon` or `maximum_iterations` parameters violate their bounds. """ if not matrix.dtype.is_floating: raise ValueError("multipliers must have a floating-point dtype") if epsilon <= 0.0: raise ValueError("epsilon must be strictly positive") if maximum_iterations <= 0: raise ValueError("maximum_iterations must be strictly positive") def while_loop_condition(iteration, eigenvector, old_eigenvector): """Returns false if the while loop should terminate.""" not_done = (iteration < maximum_iterations) not_converged = (standard_ops.norm(eigenvector - old_eigenvector) > epsilon) return standard_ops.logical_and(not_done, not_converged) def while_loop_body(iteration, eigenvector, old_eigenvector): """Performs one iteration of the power method.""" del old_eigenvector # Needed by the condition, but not the body. iteration += 1 # We need to use tf.matmul() and tf.expand_dims(), instead of # tf.tensordot(), since the former will infer the shape of the result, while # the latter will not (tf.while_loop() needs the shapes). new_eigenvector = standard_ops.matmul( matrix, standard_ops.expand_dims(eigenvector, 1))[:, 0] new_eigenvector /= standard_ops.norm(new_eigenvector) return (iteration, new_eigenvector, eigenvector) iteration = standard_ops.constant(0) eigenvector = standard_ops.ones_like(matrix[:, 0]) eigenvector /= standard_ops.norm(eigenvector) # We actually want a do-while loop, so we explicitly call while_loop_body() # once before tf.while_loop(). iteration, eigenvector, old_eigenvector = while_loop_body( iteration, eigenvector, eigenvector) iteration, eigenvector, old_eigenvector = control_flow_ops.while_loop( while_loop_condition, while_loop_body, loop_vars=(iteration, eigenvector, old_eigenvector), name="power_method") return eigenvector
def _initial_state(self, num_constraints): # For a MultiplicativeSwapRegretOptimizer, the internal state is a tensor of # shape (m+1,m+1), where m is the number of constraints, representing the # element-wise logarithm of a left-stochastic matrix. dimension = num_constraints + 1 # Initialize by putting as much weight as possible on the objective, and as # little as possible on the constraints. log_initial_one = math.log(1.0 - (self._initial_multiplier_radius * (dimension - 1) / (dimension))) log_initial_zero = math.log(self._initial_multiplier_radius / dimension) return standard_ops.concat( (standard_ops.constant( log_initial_one, dtype=dtypes.float32, shape=(1, dimension)), standard_ops.constant( log_initial_zero, dtype=dtypes.float32, shape=(dimension - 1, dimension))), axis=0)
def testIndexedSlicesGradient(self): with ops.Graph().as_default(): embedding_matrix = tf.get_variable( "embedding_matrix", [5, 5], initializer=tf.random_normal_initializer()) def Cond(it, _): return it < 5 def Body(it, cost): embedding = embedding_ops.embedding_lookup(embedding_matrix + 0.0, [0]) cost += tf.reduce_sum(embedding) return it + 1, cost _, cost = control_flow_ops.While( Cond, Body, [tf.constant(0), tf.constant(0.0)]) optimizer = momentum.MomentumOptimizer(0.1, 0.9) train_op = optimizer.minimize(cost) with self.test_session() as sess: sess.run(tf.initialize_all_variables()) for _ in range(10): sess.run([train_op])
def test_maximum_eigenvector_power_method(self): """Tests power method routine on some known left-stochastic matrices.""" matrix1 = np.matrix([[0.6, 0.1, 0.1], [0.0, 0.6, 0.9], [0.4, 0.3, 0.0]]) matrix2 = np.matrix([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.test_session() as session: eigenvector1 = session.run( swap_regret_optimizer._maximal_eigenvector_power_method( standard_ops.constant(matrix1))) eigenvector2 = session.run( swap_regret_optimizer._maximal_eigenvector_power_method( standard_ops.constant(matrix2))) # Check that eigenvector1 and eigenvector2 are eigenvectors of matrix1 and # matrix2 (respectively) with associated eigenvalue 1. matrix_eigenvector1 = np.tensordot(matrix1, eigenvector1, axes=1) matrix_eigenvector2 = np.tensordot(matrix2, eigenvector2, axes=1) self.assertAllClose(eigenvector1, matrix_eigenvector1, rtol=0, atol=1e-6) self.assertAllClose(eigenvector2, matrix_eigenvector2, rtol=0, atol=1e-6)
def testWhileContext(self): with self.test_session() as sess: i = tf.constant(0) c = lambda i: tf.less(i, 10) b = lambda i: tf.add(i, 1) tf.while_loop(c, b, [i]) for op in sess.graph.get_operations(): c = op._get_control_flow_context() if c: compare.ProtoEq( c.to_proto(), control_flow_ops.WhileContext.from_proto(c.to_proto()).to_proto())
def testWhileContext(self): with self.test_session() as sess: i = tf.constant(0) c = lambda i: tf.less(i, 10) b = lambda i: tf.add(i, 1) tf.while_loop(c, b, [i]) for op in sess.graph.get_operations(): c = op._get_control_flow_context() if c: compare.ProtoEq( c.to_proto(), control_flow_ops.WhileContext.from_proto( c.to_proto()).to_proto())
def __init__(self, constraints): """Constructs a new `ConstantMinimizationProblem'. Args: constraints: 1d numpy array, the constant constraint violations. Returns: A new `ConstantMinimizationProblem'. """ # We make an fake 1-parameter linear objective so that we don't get a "no # variables to optimize" error. self._objective = standard_ops.Variable(0.0, dtype=dtypes.float32) self._constraints = standard_ops.constant(constraints, dtype=dtypes.float32)
def test_project_stochastic_matrix_wrt_euclidean_norm(self): """Tests Euclidean projection routine on some known values.""" matrix = standard_ops.constant([[-0.1, -0.1, 0.4], [-0.8, 0.4, 1.2], [-0.3, 0.1, 0.2]]) expected_projected_matrix = np.array([[0.6, 0.1, 0.1], [0.0, 0.6, 0.9], [0.4, 0.3, 0.0]]) with self.test_session() as session: projected_matrix = session.run( swap_regret_optimizer._project_stochastic_matrix_wrt_euclidean_norm( matrix)) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def test_project_stochastic_matrix_wrt_euclidean_norm(self): """Tests Euclidean projection routine on some known values.""" matrix = standard_ops.constant([[-0.1, -0.1, 0.4], [-0.8, 0.4, 1.2], [-0.3, 0.1, 0.2]]) expected_projected_matrix = np.array([[0.6, 0.1, 0.1], [0.0, 0.6, 0.9], [0.4, 0.3, 0.0]]) with self.cached_session() as session: projected_matrix = session.run( swap_regret_optimizer._project_stochastic_matrix_wrt_euclidean_norm( matrix)) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def testGroup_MultiDevice(self): with ops.Graph().as_default() as g: with g.device("/task:0"): a = tf.constant(0, name="a") b = tf.constant(0, name="b") with g.device("/task:1"): c = tf.constant(0, name="c") d = tf.constant(0, name="d") with g.device("/task:2"): tf.group(a.op, b.op, c.op, d.op, name="root") gd = g.as_graph_def() self.assertProtoEquals(""" node { name: "a" op: "Const" device: "/task:0"} node { name: "b" op: "Const" device: "/task:0"} node { name: "c" op: "Const" device: "/task:1"} node { name: "d" op: "Const" device: "/task:1"} node { name: "root/NoOp" op: "NoOp" input: "^a" input: "^b" device: "/task:0" } node { name: "root/NoOp_1" op: "NoOp" input: "^c" input: "^d" device: "/task:1" } node { name: "root" op: "NoOp" input: "^root/NoOp" input: "^root/NoOp_1" device: "/task:2" } """, self._StripGraph(gd))
def test_project_multipliers_wrt_euclidean_norm(self): """Tests Euclidean projection routine on some known values.""" multipliers1 = standard_ops.constant([-0.1, -0.6, -0.3]) expected_projected_multipliers1 = np.array([0.0, 0.0, 0.0]) multipliers2 = standard_ops.constant([-0.1, 0.6, 0.3]) expected_projected_multipliers2 = np.array([0.0, 0.6, 0.3]) multipliers3 = standard_ops.constant([0.4, 0.7, -0.2, 0.5, 0.1]) expected_projected_multipliers3 = np.array([0.2, 0.5, 0.0, 0.3, 0.0]) with self.test_session() as session: projected_multipliers1 = session.run( external_regret_optimizer._project_multipliers_wrt_euclidean_norm( multipliers1, 1.0)) projected_multipliers2 = session.run( external_regret_optimizer._project_multipliers_wrt_euclidean_norm( multipliers2, 1.0)) projected_multipliers3 = session.run( external_regret_optimizer._project_multipliers_wrt_euclidean_norm( multipliers3, 1.0)) self.assertAllClose( expected_projected_multipliers1, projected_multipliers1, rtol=0, atol=1e-6) self.assertAllClose( expected_projected_multipliers2, projected_multipliers2, rtol=0, atol=1e-6) self.assertAllClose( expected_projected_multipliers3, projected_multipliers3, rtol=0, atol=1e-6)
def test_project_log_stochastic_matrix_wrt_kl_divergence(self): """Tests KL-divergence projection routine on some known values.""" matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], [0.2, 1.0, 0.9]]) expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.cached_session() as session: projected_matrix = session.run( standard_ops.exp( swap_regret_optimizer. _project_log_stochastic_matrix_wrt_kl_divergence( standard_ops.log(matrix)))) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def test_project_log_stochastic_matrix_wrt_kl_divergence(self): """Tests KL-divergence projection routine on some known values.""" matrix = standard_ops.constant([[0.2, 0.8, 0.6], [0.1, 0.2, 1.5], [0.2, 1.0, 0.9]]) expected_projected_matrix = np.array([[0.4, 0.4, 0.2], [0.2, 0.1, 0.5], [0.4, 0.5, 0.3]]) with self.test_session() as session: projected_matrix = session.run( standard_ops.exp( swap_regret_optimizer. _project_log_stochastic_matrix_wrt_kl_divergence( standard_ops.log(matrix)))) self.assertAllClose( expected_projected_matrix, projected_matrix, rtol=0, atol=1e-6)
def _project_multipliers_wrt_euclidean_norm(multipliers, radius): """Projects its argument onto the feasible region. The feasible region is the set of all vectors with nonnegative elements that sum to at most `radius`. Args: multipliers: 1d tensor, the Lagrange multipliers to project. radius: float, the radius of the feasible region. Returns: The 1d tensor that results from projecting `multipliers` onto the feasible region w.r.t. the Euclidean norm. Raises: ValueError: if the `multipliers` tensor is not floating-point, does not have a fully-known shape, or is not one-dimensional. """ if not multipliers.dtype.is_floating: raise ValueError("multipliers must have a floating-point dtype") multipliers_shape = multipliers.get_shape() if multipliers_shape.ndims is None: raise ValueError("multipliers must have known shape") if multipliers_shape.ndims != 1: raise ValueError( "multipliers must be one dimensional (instead is %d-dimensional)" % multipliers_shape.ndims) dimension = multipliers_shape[0].value if dimension is None: raise ValueError("multipliers must have fully-known shape") def while_loop_condition(iteration, multipliers, inactive, old_inactive): """Returns false if the while loop should terminate.""" del multipliers # Needed by the body, but not the condition. not_done = (iteration < dimension) not_converged = standard_ops.reduce_any( standard_ops.not_equal(inactive, old_inactive)) return standard_ops.logical_and(not_done, not_converged) def while_loop_body(iteration, multipliers, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = standard_ops.minimum( 0.0, (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive))) multipliers += scale * inactive new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) multipliers *= new_inactive return (iteration, multipliers, new_inactive, inactive) iteration = standard_ops.constant(0) inactive = standard_ops.ones_like(multipliers, dtype=multipliers.dtype) # We actually want a do-while loop, so we explicitly call while_loop_body() # once before tf.while_loop(). iteration, multipliers, inactive, old_inactive = while_loop_body( iteration, multipliers, inactive, inactive) iteration, multipliers, inactive, old_inactive = control_flow_ops.while_loop( while_loop_condition, while_loop_body, loop_vars=(iteration, multipliers, inactive, old_inactive), name="euclidean_projection") return multipliers
def _project_multipliers_wrt_euclidean_norm(multipliers, radius): """Projects its argument onto the feasible region. The feasible region is the set of all vectors with nonnegative elements that sum to at most `radius`. Args: multipliers: 1d tensor, the Lagrange multipliers to project. radius: float, the radius of the feasible region. Returns: The 1d tensor that results from projecting `multipliers` onto the feasible region w.r.t. the Euclidean norm. Raises: ValueError: if the `multipliers` tensor is not floating-point, does not have a fully-known shape, or is not one-dimensional. """ if not multipliers.dtype.is_floating: raise ValueError("multipliers must have a floating-point dtype") multipliers_shape = multipliers.get_shape() if multipliers_shape.ndims is None: raise ValueError("multipliers must have known shape") if multipliers_shape.ndims != 1: raise ValueError( "multipliers must be one dimensional (instead is %d-dimensional)" % multipliers_shape.ndims) dimension = multipliers_shape[0].value if dimension is None: raise ValueError("multipliers must have fully-known shape") def while_loop_condition(iteration, multipliers, inactive, old_inactive): """Returns false if the while loop should terminate.""" del multipliers # Needed by the body, but not the condition. not_done = (iteration < dimension) not_converged = standard_ops.reduce_any( standard_ops.not_equal(inactive, old_inactive)) return standard_ops.logical_and(not_done, not_converged) def while_loop_body(iteration, multipliers, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = standard_ops.minimum( 0.0, (radius - standard_ops.reduce_sum(multipliers)) / standard_ops.maximum(1.0, standard_ops.reduce_sum(inactive))) multipliers = multipliers + (scale * inactive) new_inactive = standard_ops.cast(multipliers > 0, multipliers.dtype) multipliers = multipliers * new_inactive return (iteration, multipliers, new_inactive, inactive) iteration = standard_ops.constant(0) inactive = standard_ops.ones_like(multipliers, dtype=multipliers.dtype) # We actually want a do-while loop, so we explicitly call while_loop_body() # once before tf.while_loop(). iteration, multipliers, inactive, old_inactive = while_loop_body( iteration, multipliers, inactive, inactive) iteration, multipliers, inactive, old_inactive = control_flow_ops.while_loop( while_loop_condition, while_loop_body, loop_vars=(iteration, multipliers, inactive, old_inactive), name="euclidean_projection") return multipliers
def _project_stochastic_matrix_wrt_euclidean_norm(matrix): """Projects its argument onto the set of left-stochastic matrices. This algorithm is O(n^3) at worst, where `matrix` is n*n. It can be done in O(n^2 * log(n)) time by sorting each column (and maybe better with a different algorithm), but the algorithm implemented here is easier to implement in TensorFlow. Args: matrix: 2d square tensor, the matrix to project. Returns: The 2d square tensor that results from projecting `matrix` onto the set of left-stochastic matrices w.r.t. the Euclidean norm applied column-wise (i.e. the Frobenius norm). Raises: ValueError: if the `matrix` tensor is not floating-point, does not have a fully-known shape, or is not two-dimensional and square. """ if not matrix.dtype.is_floating: raise ValueError("multipliers must have a floating-point dtype") matrix_shape = matrix.get_shape() if matrix_shape.ndims is None: raise ValueError("matrix must have known shape") if matrix_shape.ndims != 2: raise ValueError( "matrix must be two dimensional (instead is %d-dimensional)" % matrix_shape.ndims) if matrix_shape[0] != matrix_shape[1]: raise ValueError("matrix must be square (instead has shape (%d,%d))" % (matrix_shape[0], matrix_shape[1])) dimension = matrix_shape[0].value if dimension is None: raise ValueError("matrix must have fully-known shape") def while_loop_condition(iteration, matrix, inactive, old_inactive): """Returns false if the while loop should terminate.""" del matrix # Needed by the body, but not the condition. not_done = (iteration < dimension) not_converged = standard_ops.reduce_any( standard_ops.not_equal(inactive, old_inactive)) return standard_ops.logical_and(not_done, not_converged) def while_loop_body(iteration, matrix, inactive, old_inactive): """Performs one iteration of the projection.""" del old_inactive # Needed by the condition, but not the body. iteration += 1 scale = (1.0 - standard_ops.reduce_sum( matrix, axis=0, keepdims=True)) / standard_ops.maximum( 1.0, standard_ops.reduce_sum(inactive, axis=0, keepdims=True)) matrix += scale * inactive new_inactive = standard_ops.cast(matrix > 0, matrix.dtype) matrix *= new_inactive return (iteration, matrix, new_inactive, inactive) iteration = standard_ops.constant(0) inactive = standard_ops.ones_like(matrix, dtype=matrix.dtype) # We actually want a do-while loop, so we explicitly call while_loop_body() # once before tf.while_loop(). iteration, matrix, inactive, old_inactive = while_loop_body( iteration, matrix, inactive, inactive) iteration, matrix, inactive, old_inactive = control_flow_ops.while_loop( while_loop_condition, while_loop_body, loop_vars=(iteration, matrix, inactive, old_inactive), name="euclidean_projection") return matrix