def _test_matmul(self, with_batch): for use_placeholder in self._use_placeholder_options: for build_info in self._operator_build_infos: # If batch dimensions are omitted, but there are # no batch dimensions for the linear operator, then # skip the test case. This is already checked with # with_batch=True. if not with_batch and len(build_info.shape) <= 2: continue for dtype in self._dtypes_to_test: for adjoint in self._adjoint_options: for adjoint_arg in self._adjoint_arg_options: with self.test_session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( build_info, dtype, use_placeholder=use_placeholder) x = self._make_x( operator, adjoint=adjoint, with_batch=with_batch) # If adjoint_arg, compute A X^H^H = A X. if adjoint_arg: op_matmul = operator.matmul( linalg.adjoint(x), adjoint=adjoint, adjoint_arg=adjoint_arg) else: op_matmul = operator.matmul(x, adjoint=adjoint) mat_matmul = linear_operator_util.matmul_with_broadcast( mat, x, adjoint_a=adjoint) if not use_placeholder: self.assertAllEqual(op_matmul.get_shape(), mat_matmul.get_shape()) op_matmul_v, mat_matmul_v = sess.run( [op_matmul, mat_matmul], feed_dict=feed_dict) self.assertAC(op_matmul_v, mat_matmul_v)
def _test_matmul(self, with_batch): for use_placeholder in self._use_placeholder_options: for build_info in self._operator_build_infos: # If batch dimensions are omitted, but there are # no batch dimensions for the linear operator, then # skip the test case. This is already checked with # with_batch=True. if not with_batch and len(build_info.shape) <= 2: continue for dtype in self._dtypes_to_test: for adjoint in self._adjoint_options: for adjoint_arg in self._adjoint_arg_options: with self.session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat = self._operator_and_matrix( build_info, dtype, use_placeholder=use_placeholder) x = self._make_x( operator, adjoint=adjoint, with_batch=with_batch) # If adjoint_arg, compute A X^H^H = A X. if adjoint_arg: op_matmul = operator.matmul( linalg.adjoint(x), adjoint=adjoint, adjoint_arg=adjoint_arg) else: op_matmul = operator.matmul(x, adjoint=adjoint) mat_matmul = linear_operator_util.matmul_with_broadcast( mat, x, adjoint_a=adjoint) if not use_placeholder: self.assertAllEqual(op_matmul.get_shape(), mat_matmul.get_shape()) op_matmul_v, mat_matmul_v = sess.run( [op_matmul, mat_matmul]) self.assertAC(op_matmul_v, mat_matmul_v)
def _test_matmul(self, with_batch): for use_placeholder in self._use_placeholder_options: for build_info in self._operator_build_infos: for dtype in self._dtypes_to_test: for adjoint in self._adjoint_options: for adjoint_arg in self._adjoint_arg_options: with self.test_session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( build_info, dtype, use_placeholder=use_placeholder) x = self._make_x( operator, adjoint=adjoint, with_batch=with_batch) # If adjoint_arg, compute A X^H^H = A X. if adjoint_arg: op_matmul = operator.matmul( linalg.adjoint(x), adjoint=adjoint, adjoint_arg=adjoint_arg) else: op_matmul = operator.matmul(x, adjoint=adjoint) mat_matmul = linear_operator_util.matmul_with_broadcast( mat, x, adjoint_a=adjoint) if not use_placeholder: self.assertAllEqual(op_matmul.get_shape(), mat_matmul.get_shape()) op_matmul_v, mat_matmul_v = sess.run( [op_matmul, mat_matmul], feed_dict=feed_dict) self.assertAC(op_matmul_v, mat_matmul_v)
def test_static_dims_broadcast_y_has_extra_dims_transpose_dynamic(self): # Since the second arg has extra dims, and the domain dim of the first arg # is larger than the number of linear equations, code will "flip" the extra # dims of the first arg to the far right, making extra linear equations # (then call the matrix function, then flip back). # We have verified that this optimization indeed happens. How? We stepped # through with a debugger. x = rng.rand(1, 7, 5) y = rng.rand(2, 3, 1, 7) x_broadcast = x + np.zeros((2, 3, 1, 1)) x_ph = array_ops.placeholder(dtypes.float64, [None, None, None]) y_ph = array_ops.placeholder(dtypes.float64, [None, None, None, None]) with self.cached_session(): result = linear_operator_util.matmul_with_broadcast( x_ph, y_ph, transpose_a=True, transpose_b=True) self.assertAllEqual(4, result.shape.ndims) expected = math_ops.matmul( x_broadcast, y, transpose_a=True, transpose_b=True) self.assertAllClose(expected.eval(), result.eval(feed_dict={ x_ph: x, y_ph: y }))
def test_static_dims_broadcast_y_has_extra_dims_transpose_dynamic(self): # Since the second arg has extra dims, and the domain dim of the first arg # is larger than the number of linear equations, code will "flip" the extra # dims of the first arg to the far right, making extra linear equations # (then call the matrix function, then flip back). # We have verified that this optimization indeed happens. How? We stepped # through with a debugger. x = rng.rand(1, 7, 5) y = rng.rand(2, 3, 1, 7) x_broadcast = x + np.zeros((2, 3, 1, 1)) x_ph = array_ops.placeholder(dtypes.float64, [None, None, None]) y_ph = array_ops.placeholder(dtypes.float64, [None, None, None, None]) with self.cached_session(): result = linear_operator_util.matmul_with_broadcast( x_ph, y_ph, transpose_a=True, transpose_b=True) self.assertAllEqual(4, result.shape.ndims) expected = math_ops.matmul(x_broadcast, y, transpose_a=True, transpose_b=True) self.assertAllClose(expected.eval(), result.eval(feed_dict={ x_ph: x, y_ph: y }))
def test_basic_statistics_no_latent_variance(self): batch_shape = [4, 3] num_timesteps = 10 num_features = 2 drift_scale = 0. design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) initial_state_loc = self._build_placeholder( np.random.randn(*(batch_shape + [num_features]))) initial_state_scale = tf.zeros_like(initial_state_loc) initial_state_prior = tfd.MultivariateNormalDiag( loc=initial_state_loc, scale_diag=initial_state_scale) ssm = DynamicLinearRegressionStateSpaceModel( num_timesteps=num_timesteps, design_matrix=design_matrix, drift_scale=drift_scale, initial_state_prior=initial_state_prior) predicted_time_series = linear_operator_util.matmul_with_broadcast( design_matrix, initial_state_loc[..., tf.newaxis]) self.assertAllEqual(self.evaluate(ssm.mean()), predicted_time_series) self.assertAllEqual( *self.evaluate((ssm.stddev(), tf.zeros_like(predicted_time_series))))
def operator_and_matrix( self, build_info, dtype, use_placeholder, ensure_self_adjoint_and_pd=False): shape = list(build_info.shape) reflection_axis = linear_operator_test_util.random_sign_uniform( shape[:-1], minval=1., maxval=2., dtype=dtype) # Make sure unit norm. reflection_axis = reflection_axis / linalg_ops.norm( reflection_axis, axis=-1, keepdims=True) lin_op_reflection_axis = reflection_axis if use_placeholder: lin_op_reflection_axis = array_ops.placeholder_with_default( reflection_axis, shape=None) operator = householder.LinearOperatorHouseholder(lin_op_reflection_axis) mat = reflection_axis[..., array_ops.newaxis] matrix = -2 * linear_operator_util.matmul_with_broadcast( mat, mat, adjoint_b=True) matrix = array_ops.matrix_set_diag( matrix, 1. + array_ops.matrix_diag_part(matrix)) return operator, matrix
def _test_matmul(self, with_batch): for use_placeholder in self._use_placeholder_options: for build_info in self._operator_build_infos: for dtype in self._dtypes_to_test: for adjoint in self._adjoint_options: for adjoint_arg in self._adjoint_arg_options: with self.test_session(graph=ops.Graph()) as sess: sess.graph.seed = random_seed.DEFAULT_GRAPH_SEED operator, mat, feed_dict = self._operator_and_mat_and_feed_dict( build_info, dtype, use_placeholder=use_placeholder) x = self._make_x(operator, adjoint=adjoint, with_batch=with_batch) # If adjoint_arg, compute A X^H^H = A X. if adjoint_arg: op_matmul = operator.matmul( linalg.adjoint(x), adjoint=adjoint, adjoint_arg=adjoint_arg) else: op_matmul = operator.matmul( x, adjoint=adjoint) mat_matmul = linear_operator_util.matmul_with_broadcast( mat, x, adjoint_a=adjoint) if not use_placeholder: self.assertAllEqual( op_matmul.get_shape(), mat_matmul.get_shape()) op_matmul_v, mat_matmul_v = sess.run( [op_matmul, mat_matmul], feed_dict=feed_dict) self.assertAC(op_matmul_v, mat_matmul_v)
def _to_dense(self): normalized_axis = self.reflection_axis / linalg.norm( self.reflection_axis, axis=-1, keepdims=True) mat = normalized_axis[..., array_ops.newaxis] matrix = -2 * linear_operator_util.matmul_with_broadcast( mat, mat, adjoint_b=True) return array_ops.matrix_set_diag( matrix, 1. + array_ops.matrix_diag_part(matrix))
def _solve(self, rhs, adjoint=False, adjoint_arg=False): if self.base_operator.is_non_singular is False: raise ValueError( "Solve not implemented unless this is a perturbation of a " "non-singular LinearOperator.") # The Woodbury formula gives: # https://en.wikipedia.org/wiki/Woodbury_matrix_identity # (L + UDV^H)^{-1} # = L^{-1} - L^{-1} U (D^{-1} + V^H L^{-1} U)^{-1} V^H L^{-1} # = L^{-1} - L^{-1} U C^{-1} V^H L^{-1} # where C is the capacitance matrix, C := D^{-1} + V^H L^{-1} U # Note also that, with ^{-H} being the inverse of the adjoint, # (L + UDV^H)^{-H} # = L^{-H} - L^{-H} V C^{-H} U^H L^{-H} l = self.base_operator if adjoint: v = self.u u = self.v else: v = self.v u = self.u # L^{-1} rhs linv_rhs = l.solve(rhs, adjoint=adjoint, adjoint_arg=adjoint_arg) # V^H L^{-1} rhs vh_linv_rhs = linear_operator_util.matmul_with_broadcast( v, linv_rhs, adjoint_a=True) # C^{-1} V^H L^{-1} rhs if self._use_cholesky: capinv_vh_linv_rhs = linear_operator_util.cholesky_solve_with_broadcast( self._chol_capacitance, vh_linv_rhs) else: capinv_vh_linv_rhs = linear_operator_util.matrix_solve_with_broadcast( self._capacitance, vh_linv_rhs, adjoint=adjoint) # U C^{-1} V^H M^{-1} rhs u_capinv_vh_linv_rhs = linear_operator_util.matmul_with_broadcast( u, capinv_vh_linv_rhs) # L^{-1} U C^{-1} V^H L^{-1} rhs linv_u_capinv_vh_linv_rhs = l.solve(u_capinv_vh_linv_rhs, adjoint=adjoint) # L^{-1} - L^{-1} U C^{-1} V^H L^{-1} return linv_rhs - linv_u_capinv_vh_linv_rhs
def _matmul(self, x, adjoint=False, adjoint_arg=False): u = self.u v = self.v l = self.base_operator d = self.diag_operator leading_term = l.matmul(x, adjoint=adjoint, adjoint_arg=adjoint_arg) if adjoint: uh_x = linear_operator_util.matmul_with_broadcast( u, x, adjoint_a=True, adjoint_b=adjoint_arg) d_uh_x = d.matmul(uh_x, adjoint=adjoint) v_d_uh_x = linear_operator_util.matmul_with_broadcast(v, d_uh_x) return leading_term + v_d_uh_x else: vh_x = linear_operator_util.matmul_with_broadcast( v, x, adjoint_a=True, adjoint_b=adjoint_arg) d_vh_x = d.matmul(vh_x, adjoint=adjoint) u_d_vh_x = linear_operator_util.matmul_with_broadcast(u, d_vh_x) return leading_term + u_d_vh_x
def _matmul(self, x, adjoint=False, adjoint_arg=False): u = self.u v = self.v l = self.base_operator d = self.diag_operator leading_term = l.matmul(x, adjoint=adjoint, adjoint_arg=adjoint_arg) if adjoint: uh_x = linear_operator_util.matmul_with_broadcast( u, x, adjoint_a=True, adjoint_b=adjoint_arg) d_uh_x = d.matmul(uh_x, adjoint=adjoint) v_d_uh_x = linear_operator_util.matmul_with_broadcast( v, d_uh_x) return leading_term + v_d_uh_x else: vh_x = linear_operator_util.matmul_with_broadcast( v, x, adjoint_a=True, adjoint_b=adjoint_arg) d_vh_x = d.matmul(vh_x, adjoint=adjoint) u_d_vh_x = linear_operator_util.matmul_with_broadcast(u, d_vh_x) return leading_term + u_d_vh_x
def test_static_dims_broadcast(self): # batch_shape = [2] # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7 x = rng.rand(2, 1, 3) y = rng.rand(3, 7) y_broadcast = y + np.zeros((2, 1, 1)) with self.cached_session(): result = linear_operator_util.matmul_with_broadcast(x, y) self.assertAllEqual((2, 1, 7), result.get_shape()) expected = math_ops.matmul(x, y_broadcast) self.assertAllEqual(expected.eval(), result.eval())
def test_simple_regression_correctness(self): # Verify that optimizing a simple linear regression by gradient descent # recovers the known-correct weights. batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) true_weights = self._build_placeholder([4., -3.]) predicted_time_series = linear_operator_util.matmul_with_broadcast( design_matrix, true_weights[..., tf.newaxis]) linear_regression = LinearRegression( design_matrix=design_matrix, weights_prior=tfd.Independent(tfd.Cauchy( loc=self._build_placeholder(np.zeros([num_features])), scale=self._build_placeholder(np.ones([num_features]))), reinterpreted_batch_ndims=1)) observation_noise_scale_prior = tfd.LogNormal( loc=self._build_placeholder(-2), scale=self._build_placeholder(0.1)) model = Sum( components=[linear_regression], observation_noise_scale_prior=observation_noise_scale_prior) learnable_weights = tf.compat.v2.Variable( tf.zeros([num_features], dtype=true_weights.dtype)) def build_loss(): learnable_ssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals={ "LinearRegression/_weights": learnable_weights, "observation_noise_scale": observation_noise_scale_prior.mode() }) return -learnable_ssm.log_prob(predicted_time_series) # We provide graph- and eager-mode optimization for TF 2.0 compatibility. num_train_steps = 80 optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=0.1) if tf.executing_eagerly(): for _ in range(num_train_steps): optimizer.minimize(build_loss) else: train_op = optimizer.minimize(build_loss()) self.evaluate(tf.compat.v1.global_variables_initializer()) for _ in range(num_train_steps): _ = self.evaluate(train_op) self.assertAllClose(*self.evaluate((true_weights, learnable_weights)), atol=0.2)
def _make_capacitance(self): # C := D^{-1} + V^H L^{-1} U # which is sometimes known as the "capacitance" matrix. # L^{-1} U linv_u = self.base_operator.solve(self.u) # V^H L^{-1} U vh_linv_u = linear_operator_util.matmul_with_broadcast( self.v, linv_u, adjoint_a=True) # D^{-1} + V^H L^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vh_linv_u) return capacitance
def _make_capacitance(self): # C := D^{-1} + V^H L^{-1} U # which is sometimes known as the "capacitance" matrix. # L^{-1} U linv_u = self.base_operator.solve(self.u) # V^H L^{-1} U vh_linv_u = linear_operator_util.matmul_with_broadcast(self.v, linv_u, adjoint_a=True) # D^{-1} + V^H L^{-1} V capacitance = self._diag_inv_operator.add_to_tensor(vh_linv_u) return capacitance
def test_dynamic_dims_broadcast_32bit(self): # batch_shape = [2] # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7 x = rng.rand(2, 1, 3) y = rng.rand(3, 7) y_broadcast = y + np.zeros((2, 1, 1)) x_ph = array_ops.placeholder(dtypes.float64) y_ph = array_ops.placeholder(dtypes.float64) with self.test_session() as sess: result, expected = sess.run( [linear_operator_util.matmul_with_broadcast(x_ph, y_ph), math_ops.matmul(x, y_broadcast)], feed_dict={x_ph: x, y_ph: y}) self.assertAllEqual(expected, result)
def test_static_dims_broadcast_y_has_extra_dims(self): # Since the second arg has extra dims, and the domain dim of the first arg # is larger than the number of linear equations, code will "flip" the extra # dims of the first arg to the far right, making extra linear equations # (then call the matrix function, then flip back). # We have verified that this optimization indeed happens. How? We stepped # through with a debugger. x = rng.rand(5, 7) y = rng.rand(2, 3, 7, 5) x_broadcast = x + np.zeros((2, 3, 5, 7)) with self.cached_session(): result = linear_operator_util.matmul_with_broadcast(x, y) self.assertAllEqual((2, 3, 5, 5), result.get_shape()) expected = math_ops.matmul(x_broadcast, y) self.assertAllClose(expected.eval(), result.eval())
def test_static_dims_broadcast_y_has_extra_dims(self): # Since the second arg has extra dims, and the domain dim of the first arg # is larger than the number of linear equations, code will "flip" the extra # dims of the first arg to the far right, making extra linear equations # (then call the matrix function, then flip back). # We have verified that this optimization indeed happens. How? We stepped # through with a debugger. x = rng.rand(5, 7) y = rng.rand(2, 3, 7, 5) x_broadcast = x + np.zeros((2, 3, 5, 7)) with self.cached_session(): result = linear_operator_util.matmul_with_broadcast(x, y) self.assertAllEqual((2, 3, 5, 5), result.get_shape()) expected = math_ops.matmul(x_broadcast, y) self.assertAllClose(expected.eval(), self.evaluate(result))
def benchmarkBatchMatMulBroadcast(self): for (a_shape, b_shape) in self.shape_pairs: with compat.forward_compatibility_horizon(2019, 4, 26): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix_a = variables.Variable( GetRandomNormalInput(a_shape, np.float32)) matrix_b = variables.Variable( GetRandomNormalInput(b_shape, np.float32)) variables.global_variables_initializer().run() # Use batch matmul op's internal broadcasting. self.run_op_benchmark(sess, math_ops.matmul(matrix_a, matrix_b), min_iters=50, name="batch_matmul_cpu_{}_{}".format( a_shape, b_shape)) # Manually broadcast the input matrices using the broadcast_to op. broadcasted_batch_shape = array_ops.broadcast_static_shape( matrix_a.shape[:-2], matrix_b.shape[:-2]) broadcasted_a_shape = broadcasted_batch_shape.concatenate( matrix_a.shape[-2:]) broadcasted_b_shape = broadcasted_batch_shape.concatenate( matrix_b.shape[-2:]) self.run_op_benchmark( sess, math_ops.matmul( array_ops.broadcast_to(matrix_a, broadcasted_a_shape), array_ops.broadcast_to(matrix_b, broadcasted_b_shape)), min_iters=50, name="batch_matmul_manual_broadcast_cpu_{}_{}".format( a_shape, b_shape)) # Use linear_operator_util.matmul_with_broadcast. name_template = ( "batch_matmul_manual_broadcast_with_linear_operator_util" "_cpu_{}_{}") self.run_op_benchmark( sess, linear_operator_util.matmul_with_broadcast( matrix_a, matrix_b), min_iters=50, name=name_template.format(a_shape, b_shape))
def benchmarkBatchMatMulBroadcast(self): for (a_shape, b_shape) in self.shape_pairs: with compat.forward_compatibility_horizon(2019, 4, 19): with ops.Graph().as_default(), \ session.Session(config=benchmark.benchmark_config()) as sess, \ ops.device("/cpu:0"): matrix_a = variables.Variable( GetRandomNormalInput(a_shape, np.float32)) matrix_b = variables.Variable( GetRandomNormalInput(b_shape, np.float32)) variables.global_variables_initializer().run() # Use batch matmul op's internal broadcasting. self.run_op_benchmark( sess, math_ops.matmul(matrix_a, matrix_b), min_iters=50, name="batch_matmul_cpu_{}_{}".format(a_shape, b_shape)) # Manually broadcast the input matrices using the broadcast_to op. broadcasted_batch_shape = array_ops.broadcast_static_shape( matrix_a.shape[:-2], matrix_b.shape[:-2]) broadcasted_a_shape = broadcasted_batch_shape.concatenate( matrix_a.shape[-2:]) broadcasted_b_shape = broadcasted_batch_shape.concatenate( matrix_b.shape[-2:]) self.run_op_benchmark( sess, math_ops.matmul( array_ops.broadcast_to(matrix_a, broadcasted_a_shape), array_ops.broadcast_to(matrix_b, broadcasted_b_shape)), min_iters=50, name="batch_matmul_manual_broadcast_cpu_{}_{}".format( a_shape, b_shape)) # Use linear_operator_util.matmul_with_broadcast. name_template = ( "batch_matmul_manual_broadcast_with_linear_operator_util" "_cpu_{}_{}" ) self.run_op_benchmark( sess, linear_operator_util.matmul_with_broadcast(matrix_a, matrix_b), min_iters=50, name=name_template.format(a_shape, b_shape))
def test_simple_regression_correctness(self): # Verify that optimizing a simple linear regression by gradient descent # recovers the known-correct weights. batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) true_weights = self._build_placeholder([4., -3.]) predicted_time_series = linear_operator_util.matmul_with_broadcast( design_matrix, true_weights[..., tf.newaxis]) linear_regression = LinearRegression( design_matrix=design_matrix, weights_prior=tfd.Independent(tfd.Cauchy( loc=self._build_placeholder(np.zeros([num_features])), scale=self._build_placeholder(np.ones([num_features]))), reinterpreted_batch_ndims=1)) observation_noise_scale_prior = tfd.LogNormal( loc=self._build_placeholder(-2), scale=self._build_placeholder(0.1)) model = Sum( components=[linear_regression], observation_noise_scale_prior=observation_noise_scale_prior) learnable_weights = tf.Variable( tf.zeros([num_features], dtype=true_weights.dtype)) learnable_ssm = model.make_state_space_model( num_timesteps=num_timesteps, param_vals={ "LinearRegression/_weights": learnable_weights, "observation_noise_scale": observation_noise_scale_prior.mode() }) loss = -learnable_ssm.log_prob(predicted_time_series) train_op = tf.train.AdamOptimizer(0.1).minimize(loss) with self.test_session() as sess: sess.run(tf.global_variables_initializer()) for _ in range(80): _ = sess.run(train_op) self.assertAllClose(*sess.run((true_weights, learnable_weights)), atol=0.2)
def test_dynamic_dims_broadcast_64bit(self): # batch_shape = [2] # for each batch member, we have a 1x3 matrix times a 3x7 matrix ==> 1x7 x = rng.rand(2, 1, 3) y = rng.rand(3, 7) y_broadcast = y + np.zeros((2, 1, 1)) x_ph = array_ops.placeholder(dtypes.float64) y_ph = array_ops.placeholder(dtypes.float64) with self.cached_session() as sess: result, expected = sess.run([ linear_operator_util.matmul_with_broadcast(x_ph, y_ph), math_ops.matmul(x, y_broadcast) ], feed_dict={ x_ph: x, y_ph: y }) self.assertAllClose(expected, result)
def test_basic_statistics(self): # Verify that this model constructs a distribution with mean # `matmul(design_matrix, weights)` and stddev 0. batch_shape = [4, 3] num_timesteps = 10 num_features = 2 design_matrix = self._build_placeholder( np.random.randn(*(batch_shape + [num_timesteps, num_features]))) linear_regression = LinearRegression(design_matrix=design_matrix) true_weights = self._build_placeholder( np.random.randn(*(batch_shape + [num_features]))) predicted_time_series = linear_operator_util.matmul_with_broadcast( design_matrix, true_weights[..., tf.newaxis]) ssm = linear_regression.make_state_space_model( num_timesteps=num_timesteps, param_vals={"weights": true_weights}) self.assertAllEqual(self.evaluate(ssm.mean()), predicted_time_series) self.assertAllEqual( *self.evaluate((ssm.stddev(), tf.zeros_like(predicted_time_series))))
def _matmul(self, x, adjoint=False, adjoint_arg=False): # Given a vector `v`, we would like to reflect `x` about the hyperplane # orthogonal to `v` going through the origin. We first project `x` to `v` # to get v * dot(v, x) / dot(v, v). After we project, we can reflect the # projection about the hyperplane by flipping sign to get # -v * dot(v, x) / dot(v, v). Finally, we can add back the component # that is orthogonal to v. This is invariant under reflection, since the # whole hyperplane is invariant. This component is equal to x - v * dot(v, # x) / dot(v, v), giving the formula x - 2 * v * dot(v, x) / dot(v, v) # for the reflection. # Note that because this is a reflection, it lies in O(n) (for real vector # spaces) or U(n) (for complex vector spaces), and thus is its own adjoint. x = linalg.adjoint(x) if adjoint_arg else x normalized_axis = self.reflection_axis / linalg.norm( self.reflection_axis, axis=-1, keepdims=True) mat = normalized_axis[..., array_ops.newaxis] x_dot_normalized_v = linear_operator_util.matmul_with_broadcast( mat, x, adjoint_a=True) return x - 2 * mat * x_dot_normalized_v
def _forward(self, x): return self._Q_operator.matvec( linalg_util.matmul_with_broadcast(self._R, x[..., tf.newaxis])[..., 0])
def _matmul(self, x, adjoint=False, adjoint_arg=False): return linear_operator_util.matmul_with_broadcast( self._tril, x, adjoint_a=adjoint, adjoint_b=adjoint_arg)
def _forward(self, x): w = lu_reconstruct(lower_upper=self.lower_upper, perm=self.permutation, validate_args=self.validate_args) return linear_operator_util.matmul_with_broadcast( w, x[..., tf.newaxis])[..., 0]
def _matmul_right(self, x, adjoint=False, adjoint_arg=False): return lou.matmul_with_broadcast( x, self._matrix, adjoint_a=adjoint_arg, adjoint_b=adjoint)