def tag_solve_triangular(fgraph, node): """ If a general solve() is applied to the output of a cholesky op, then replace it with a triangular solve. """ if isinstance(node.op, Solve): if node.op.assume_a == "gen": A, b = node.inputs # result is solution Ax=b if A.owner and isinstance(A.owner.op, Cholesky): if A.owner.op.lower: return [Solve(assume_a="sym", lower=True)(A, b)] else: return [Solve(assume_a="sym", lower=False)(A, b)] if ( A.owner and isinstance(A.owner.op, DimShuffle) and A.owner.op.new_order == (1, 0) ): (A_T,) = A.owner.inputs if A_T.owner and isinstance(A_T.owner.op, Cholesky): if A_T.owner.op.lower: return [Solve(assume_a="sym", lower=False)(A, b)] else: return [Solve(assume_a="sym", lower=True)(A, b)]
def tag_solve_triangular(fgraph, node): """ If a general solve() is applied to the output of a cholesky op, then replace it with a triangular solve. """ if node.op == solve: if node.op.A_structure == "general": A, b = node.inputs # result is solution Ax=b if A.owner and isinstance(A.owner.op, type(cholesky)): if A.owner.op.lower: return [Solve("lower_triangular")(A, b)] else: return [Solve("upper_triangular")(A, b)] if ( A.owner and isinstance(A.owner.op, DimShuffle) and A.owner.op.new_order == (1, 0) ): (A_T,) = A.owner.inputs if A_T.owner and isinstance(A_T.owner.op, type(cholesky)): if A_T.owner.op.lower: return [Solve("upper_triangular")(A, b)] else: return [Solve("lower_triangular")(A, b)]
def psd_solve_with_chol(fgraph, node): if node.op == solve: A, b = node.inputs # result is solution Ax=b if is_psd(A): L = cholesky(A) # N.B. this can be further reduced to a yet-unwritten cho_solve Op # __if__ no other Op makes use of the the L matrix during the # stabilization Li_b = Solve("lower_triangular")(L, b) x = Solve("upper_triangular")(L.T, Li_b) return [x]
def psd_solve_with_chol(fgraph, node): """ This utilizes a boolean `psd` tag on matrices. """ if isinstance(node.op, Solve): A, b = node.inputs # result is solution Ax=b if getattr(A.tag, "psd", None) is True: L = cholesky(A) # N.B. this can be further reduced to a yet-unwritten cho_solve Op # __if__ no other Op makes use of the the L matrix during the # stabilization Li_b = Solve(assume_a="sym", lower=True)(L, b) x = Solve(assume_a="sym", lower=False)(L.T, Li_b) return [x]
def verify_solve_grad(self, m, n, A_structure, lower, rng): # ensure diagonal elements of A relatively large to avoid numerical # precision issues A_val = (rng.normal(size=(m, m)) * 0.5 + np.eye(m)).astype(config.floatX) if A_structure == "lower_triangular": A_val = np.tril(A_val) elif A_structure == "upper_triangular": A_val = np.triu(A_val) if n is None: b_val = rng.normal(size=m).astype(config.floatX) else: b_val = rng.normal(size=(m, n)).astype(config.floatX) eps = None if config.floatX == "float64": eps = 2e-8 solve_op = Solve(A_structure=A_structure, lower=lower) utt.verify_grad(solve_op, [A_val, b_val], 3, rng, eps=eps)
def test_solve_grad(self, m, n, assume_a, lower): rng = np.random.default_rng(utt.fetch_seed()) # Ensure diagonal elements of `A` are relatively large to avoid # numerical precision issues A_val = (rng.normal(size=(m, m)) * 0.5 + np.eye(m)).astype(config.floatX) if n is None: b_val = rng.normal(size=m).astype(config.floatX) else: b_val = rng.normal(size=(m, n)).astype(config.floatX) eps = None if config.floatX == "float64": eps = 2e-8 solve_op = Solve(assume_a=assume_a, lower=lower) utt.verify_grad(solve_op, [A_val, b_val], 3, rng, eps=eps)
def MvNormalLogp(): """Compute the log pdf of a multivariate normal distribution. This should be used in MvNormal.logp once Theano#5908 is released. Parameters ---------- cov: aet.matrix The covariance matrix. delta: aet.matrix Array of deviations from the mean. """ cov = aet.matrix("cov") cov.tag.test_value = floatX(np.eye(3)) delta = aet.matrix("delta") delta.tag.test_value = floatX(np.zeros((2, 3))) solve_lower = Solve(A_structure="lower_triangular") solve_upper = Solve(A_structure="upper_triangular") cholesky = Cholesky(lower=True, on_error="nan") n, k = delta.shape n, k = f(n), f(k) chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T result = n * k * aet.log(f(2) * np.pi) result += f(2) * n * aet.sum(aet.log(diag)) result += (delta_trans**f(2)).sum() result = f(-0.5) * result logp = aet.switch(ok, result, -np.inf) def dlogp(inputs, gradients): (g_logp, ) = gradients cov, delta = inputs g_logp.tag.test_value = floatX(1.0) n, k = delta.shape chol_cov = cholesky(cov) diag = aet.nlinalg.diag(chol_cov) ok = aet.all(diag > 0) chol_cov = aet.switch(ok, chol_cov, aet.fill(chol_cov, 1)) delta_trans = solve_lower(chol_cov, delta.T).T inner = n * aet.eye(k) - aet.dot(delta_trans.T, delta_trans) g_cov = solve_upper(chol_cov.T, inner) g_cov = solve_upper(chol_cov.T, g_cov.T) tau_delta = solve_upper(chol_cov.T, delta_trans.T) g_delta = tau_delta.T g_cov = aet.switch(ok, g_cov, -np.nan) g_delta = aet.switch(ok, g_delta, -np.nan) return [-0.5 * g_cov * g_logp, -g_delta * g_logp] return OpFromGraph([cov, delta], [logp], grad_overrides=dlogp, inline=True)
def setup_method(self): self.op_class = Solve self.op = Solve() super().setup_method()
def test__init__(self): with pytest.raises(ValueError) as excinfo: Solve(assume_a="test") assert "is not a recognized matrix structure" in str(excinfo.value)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import warnings import aesara.tensor as aet import numpy as np from aesara.tensor.slinalg import Solve, cholesky # pylint: disable=unused-import from aesara.tensor.var import TensorConstant from scipy.cluster.vq import kmeans solve_lower = Solve(A_structure="lower_triangular") solve_upper = Solve(A_structure="upper_triangular") solve = Solve(A_structure="general") def infer_shape(X, n_points=None): if n_points is None: try: n_points = np.int(X.shape[0]) except TypeError: raise TypeError("Cannot infer 'shape', provide as an argument") return n_points def stabilize(K): """ adds small diagonal to a covariance matrix """