def test_roots_hermite(): rootf = sc.roots_hermite evalf = sc.eval_hermite weightf = orth.hermite(5).weight_func verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 5) verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 25, atol=1e-13) verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 100, atol=1e-12) # Golub-Welsch branch x, w = sc.roots_hermite(5, False) y, v, m = sc.roots_hermite(5, True) assert_allclose(x, y, 1e-14, 1e-14) assert_allclose(w, v, 1e-14, 1e-14) muI, muI_err = integrate.quad(weightf, -np.inf, np.inf) assert_allclose(m, muI, rtol=muI_err) # Asymptotic branch (switch over at n >= 150) x, w = sc.roots_hermite(200, False) y, v, m = sc.roots_hermite(200, True) assert_allclose(x, y, 1e-14, 1e-14) assert_allclose(w, v, 1e-14, 1e-14) assert_allclose(sum(v), m, 1e-14, 1e-14) assert_raises(ValueError, sc.roots_hermite, 0) assert_raises(ValueError, sc.roots_hermite, 3.3)
def test_roots_hermite(): rootf = sc.roots_hermite evalf = orth.eval_hermite weightf = orth.hermite(5).weight_func verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 5) verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 25, atol=1e-13) verify_gauss_quad(rootf, evalf, weightf, -np.inf, np.inf, 100, atol=1e-12) # Golub-Welsch branch x, w = sc.roots_hermite(5, False) y, v, m = sc.roots_hermite(5, True) assert_allclose(x, y, 1e-14, 1e-14) assert_allclose(w, v, 1e-14, 1e-14) muI, muI_err = integrate.quad(weightf, -np.inf, np.inf) assert_allclose(m, muI, rtol=muI_err) # Asymptotic branch (switch over at n >= 150) x, w = sc.roots_hermite(200, False) y, v, m = sc.roots_hermite(200, True) assert_allclose(x, y, 1e-14, 1e-14) assert_allclose(w, v, 1e-14, 1e-14) assert_allclose(sum(v), m, 1e-14, 1e-14) assert_raises(ValueError, sc.roots_hermite, 0) assert_raises(ValueError, sc.roots_hermite, 3.3)
def gauss_hermite_quadrature(n, mu=None, sigma=None): """ Compute int f(x) exp(-x^2) dx = sum_{i=0}^n w[i] * f(x[i]) using Gaussian-Hermite quadrature Parameters ---------- n : TYPE DESCRIPTION. mu : TYPE, optional DESCRIPTION. The default is None. sigma : TYPE, optional DESCRIPTION. The default is None. Returns ------- TYPE DESCRIPTION. w : TYPE DESCRIPTION. """ x, w = roots_hermite(n) if mu is not None: y = sqrt(2.) * sigma * x + mu return y, w else: return x, w
def gauss_hermite(n): ''' Gauss-Hermite quadrature: A rule of order 2*n-1 on the line with respect to the weight function w(x) = exp(-x**2). ''' return special.roots_hermite(n)
def get_hfactor_expectation_coefs_points(factor, K, T, dtype='float64'): """ Get the coefficients and evaluation points needed for computing expectation of a scalar-valued function over the hybrid factor (for all K mixture components simultaneously) :param factor: :param T: number of quad points (only needed if factor contains cont node); for now assume the same for all cnodes :return: (coefs, axes): coefs, axes are lists of matricies, of shapes [[K, V1], [K, V2], ..., [K, Vn]], where n is the number of nodes in the factor, Vi = T if node i is cont, or is the number of states if node i is discrete; to compute expectation w.r.t. the kth component (fully factorized) distribution, define the tensor of coefficients C_k := \bigotimes_{i=1}^n coefs[i][k, :], the tensor of evaluation points E_k = \bigotimes_{i=1}^n axes[i][k, :], then the kth expectation is \langle vec(C_k), vec(f(E_k)) \rangle; the total expectation w.r.t. the mixture is obtained by taking a weighted mixture (by w_k) of K component expectations. The list of the kth rows of axes mats gives the axes needed to construct evaluation grid for computing the kth component-wise expectation """ coefs = [] axes = [] assert factor.domain_type in ('c', 'h'), \ 'Must input continuous/hybrid factor; use dfactor_bfe_obj directly for discrete factor for better performance' # compute GHQ once (same for all cnodes) if factor is cont/hybrid ghq_points, ghq_weights = roots_hermite(T) # assuming Gaussian for now ghq_coef = (np.pi)**(-0.5) # from change-of-var ghq_weights = ghq_coef * ghq_weights # let's fold ghq_coef into the quadrature weights, so no need to worry about it later # ghq_weights_KT = np.tile(np.reshape(ghq_weights, [1, -1]), [K, 1]) # K x T (repeat for K identical rows) ghq_weights = tf.constant(ghq_weights, dtype=dtype) ghq_weights_KT = tf.tile(tf.reshape(ghq_weights, [1, -1]), [K, 1]) # K x T (repeat for K identical rows) for rv in factor.nb: if rv.domain_type[0] == 'd': # discrete c = rv.belief_params_[ 'pi'] # K x dstates[i] matrix (tf); will be put under stop_gradient later a = np.tile(np.reshape(rv.values, [1, -1]), [K, 1]) # K x dstates[i] (last dimension repeated) a = tf.constant( a, dtype=dtype ) # otherwise tf complains about multiplying int tensor with float tensor elif rv.domain_type[0] == 'c': # cont, assuming Gaussian for now c = ghq_weights_KT mean_K1 = rv.belief_params_['mu_K1'] var_K1 = rv.belief_params_['var_K1'] a = (2 * var_K1)**0.5 * ghq_points + mean_K1 # K x T a = tf.stop_gradient( a) # don't want to differentiate w.r.t. evaluation points else: raise NotImplementedError coefs.append(c) axes.append(a) return coefs, axes
def crvs_bfe_obj(rvs, T, w, Mu, Var, rvs_counts=None): """ Get the contribution to the BFE from multiple cont (Gaussian) rvs :param rvs: rvs: list of cont rvs; must "line up with" params Mu and Var; i.e., Mu[i] and Var[i] give belief params for rvs[i] :param T: :param w: :param Mu: :param Var: :param rvs_counts: an iterable of non-negative ints, such that the bfe contribution from rvs[i] will be multiplied by rvs_counts[i]; by default the contribution from each rv is only counted once :return: """ [N, K] = Mu.shape w_1K1 = w[None, :, None] ghq_points, ghq_weights = roots_hermite(T) # assuming Gaussian for now ghq_coef = (np.pi)**(-0.5) # from change-of-var ghq_weights *= ghq_coef QY = ghq_points * (2 * tf.reshape(Var, [N, K, 1]))**0.5 + tf.reshape( Mu, [N, K, 1]) # N x K x T; all eval points QY = tf.stop_gradient(QY) # don't want to differentiate w.r.t. quad points log_belief = tf.log(eval_crvs_belief(QY, w, Mu, Var)) # N x K x T prod = tf.stop_gradient( w_1K1 * ghq_weights * log_belief) # N x K x T; weighted component-wise Hadamard products num_nbrs = np.array([len(rv.nb) for rv in rvs]) if rvs_counts is None: rvs_counts = np.ones(len(rvs), dtype='int') else: rvs_counts = np.array(rvs_counts).astype('int') expect_coefs = rvs_counts * (1 - num_nbrs) bfe = tf.reduce_sum(expect_coefs * tf.reduce_sum(prod, axis=[1, 2])) aux_obj = tf.reduce_sum(expect_coefs * tf.reduce_sum(prod * log_belief, axis=[1, 2])) return bfe, aux_obj
def create_mapping_of_nodes_and_weights(number_of_nodes): from scipy.special import roots_hermite nodes, weights = roots_hermite(number_of_nodes+1) return {'nodes': nodes, 'weights': weights}
dim) * wi # maximum search length. Default: diagonal of search domain tau = 0.9 # contraction rate. Default: 0.9 num_ls = int( (GH_pts - 1) * dim * 0.05 ) # number of points used for LS. Default: 5% of function evaluations used for computing DGS gradient # if GH_pts is even, num_ls = int(GH_pts*dim*0.05) pw = 5 * wi # initial radius. Default: 5 * width eps = 1e-3 # tolerance of relative update for resetting radius. Default: 1e-3 res_step = 10 # minimum number of step between radius reset. Default: 10 #---------------------------------------------------------- # GH values and weights #---------------------------------------------------------- gh = roots_hermite(GH_pts) gh_value = np.expand_dims(gh[0], axis=1) gh_weight = gh[1] # GH point matrix gh_value_mat = np.zeros((GH_pts * dim, dim)) gh_value_vec = np.zeros(GH_pts * dim) for i in range(dim): gh_value_mat[GH_pts * i:GH_pts * (i + 1), i] = gh[0] gh_value_vec[GH_pts * i:GH_pts * (i + 1)] = gh[0] ######################################################### # MAIN LOOP #########################################################
def gauss_hermite(fun, n, args=None): """Gauss hermite quadrature""" xi, wi = roots_hermite(n) return np.sum(fun(xi, *args) * wi)
def get_quad_bfe(g, w, Mu, Sigs, T, node_lpot, edge_lpot): """ Get the symbolic tensorflow objective for optimizing BFE with quadrature approximation for the integrals. :param g: graph; for now assuming all its continuous nodes are modeled by diag gaussian mixtures # :param nodes: length N list/set of node ids that are modeled by diag gaussian mixtures :param Mu: N x K tensor of diagonal gaussian mixture nodes means :param Sigs: N x K tensor of diagonal gaussian mixture nodes variances :param T: num quad points :return: bfe, aux_obj; aux_obj is the actual (minimization) objective that tensorflow does auto-diff w.r.t. (tf can't directly differentiate thru expectations in the bfe) """ from scipy.special import roots_hermite [N, K] = Mu.shape assert g.Nc == N # TODO: no longer assume all continuous nodes are gm; allow specifying a subset of nodes num_cedges = len(g.Ec) dtype = Mu.dtype bfe = 0 aux_obj = 0 w_col = tf.reshape(w, [K, 1]) qx_np, qw_np = roots_hermite(T) qx = tf.constant(qx_np, dtype=dtype) qw = tf.constant(qw_np, dtype=dtype) qw_outer = tf.constant(np.outer(qw_np, qw_np)) # TxT integral_coef = (np.pi)**(-0.5) QY = qx * (2 * tf.reshape(Sigs, [N, K, 1]))**0.5 + tf.reshape( Mu, [N, K, 1]) # N x K x T QY = tf.stop_gradient( QY) # don't want to differentiate w.r.t. quadrature points # all nodes num_nbrs = np.array([len(g.adj[n]) for n in g.Vc]) num_nbrs = num_nbrs.reshape([N, 1, 1]) node_log_belief = tf.log(node_belief(QY, w, Mu, Sigs)) # N x K x T F = node_lpot('c', QY) - (1 - num_nbrs) * node_log_belief # N x K x T grals = integral_coef * tf.reduce_sum(qw * F, 2) # Nc x K bfe += tf.reduce_sum(grals @ w_col) grals = integral_coef * tf.reduce_sum( qw * tf.stop_gradient(F) * node_log_belief, 2) # treating F as const aux_obj += tf.reduce_sum(grals @ w_col) # all edges cedge_i = np.array([g.Vc_idx[n] for n in g.Ec[:, 0]]) # 1 x num_cedges; from cedge_j = np.array([g.Vc_idx[n] for n in g.Ec[:, 1]]) # 1 x num_cedges; to QYi = tf.gather(QY, cedge_i) # num_cedges x K x T QYYi = tf.zeros([num_cedges, K, T, T], dtype=Mu.dtype) \ + tf.reshape(QYi, [num_cedges, K, T, 1]) # hack, since there's no tf.repeat QYj = tf.gather(QY, cedge_j) # num_cedges x K x T QYYj = tf.zeros([num_cedges, K, T, T], dtype=dtype) \ + tf.reshape(QYj, [num_cedges, K, 1, T]) # hack, since there's no tf.repeat cedge_log_belief = tf.log( edge_belief(QYYi, QYYj, w, tf.gather(Mu, cedge_i), tf.gather(Mu, cedge_j), tf.gather(Sigs, cedge_i), tf.gather(Sigs, cedge_j))) F = edge_lpot('c', 'c', QYYi, QYYj) - cedge_log_belief inner_prods = tf.reduce_sum(qw_outer * F, axis=[2, 3]) # num_cedges x K bfe += integral_coef**2 * tf.reduce_sum(inner_prods @ w_col) inner_prods = tf.reduce_sum(qw_outer * tf.stop_gradient(F) * cedge_log_belief, axis=[2, 3]) # treating F as const aux_obj += integral_coef**2 * tf.reduce_sum(inner_prods @ w_col) return bfe, aux_obj
def gauss_hermite(fun, n, args=None): """Gauss hermite quadrature""" xi, wi = roots_hermite(n) return np.sum(fun(xi, *args) * wi)
def hfactors_bfe_obj(factors, T, w, dtype='float64', neg_lpot_only=False): """ Get the contribution to the BFE from multiple hybrid (or continuous) factors that have the same types of neighboring rvs. :param factors: length C list of factor objects that have the same nb_domain_type. :param T: :param w: :param dtype: float type to use :param neg_lpot_only: if False (default), compute E_b[-log pot + log b] as in BFE; if True, only compute E_b[-log pot] (with no log belief in the expectant), to be used with neg ELBO (for NPVI) :return: """ # group factors with the same types of log potentials together for efficient evaluation later factors_with_unique_log_potential_fun_types, unique_log_potential_fun_types = \ utils.get_unique_subsets(factors, key=lambda f: type(f.log_potential_fun)) factors = sum(factors_with_unique_log_potential_fun_types, []) # join together into flat list K = np.prod(w.shape) C = len(factors) factor = factors[0] n = len(factor.nb) ghq_points, ghq_weights = roots_hermite(T) # assuming Gaussian for now ghq_coef = (np.pi)**(-0.5) # from change-of-var ghq_weights = ghq_coef * ghq_weights # let's fold ghq_coef into the quadrature weights, so no need to worry about it later ghq_weights = tf.constant(ghq_weights, dtype=dtype) ghq_weights_CKT = tf.tile(tf.reshape(ghq_weights, [1, 1, T]), [C, K, 1]) # C x K x T coefs = [None] * n # will be [[C, K, V1], [C, K, V2], ..., [C, K, Vn]] axes = [None] * n # will be [[C, K, V1], [C, K, V2], ..., [C, K, Vn]] comp_probs = [] # for evaluating beliefs along the way for i, domain_type in enumerate(factor.nb_domain_types): factors_ith_nb = [ factor.nb[i] for factor in factors ] # the ith neighbor (rv in clique) across all factors if domain_type[0] == 'd': rv = factor.nb[i] c = tf.stack( [rv.belief_params_['pi'] for rv in factors_ith_nb], axis=0 ) # C x K x Vi, where Vi is the number of dstates of factor.nb[i] coefs[ i] = c # the prob params are exactly the inner-prod coefficients in expectations a = np.tile( np.reshape(rv.values, [1, 1, -1]), [C, K, 1]) # C x K x dstates[i] (last dimension repeated) a = tf.constant( a, dtype=dtype ) # otherwise tf complains about multiplying int tensor with float tensor axes[i] = a # eval_hfactors_belief # comp_prob = tf.stack([rv.belief_params_['pi'] for rv in factors_ith_nb], # axis=1) # K x C x Vi, where Vi is the number of dstates of factor.nb[i] comp_prob = tf.transpose(c, [1, 0, 2]) # K x C x Vi comp_prob = comp_prob[:, :, None, :] # K x C x 1 x Vi comp_prob = tf.tile( comp_prob, [1, 1, K, 1]) # K x C x M(=K) x Vi; same for all M(=K) axes elif domain_type[0] == 'c': Mu_CK = tf.stack( [rv.belief_params_['mu'] for rv in factors_ith_nb], axis=0) # C x K Var_CK = tf.stack( [rv.belief_params_['var'] for rv in factors_ith_nb], axis=0) # C x K coefs[i] = ghq_weights_CKT a = (2 * Var_CK[:, :, None] )**0.5 * ghq_points + Mu_CK[:, :, None] # C x K x T a = tf.stop_gradient( a) # don't want to differentiate w.r.t. evaluation points axes[i] = a # eval_hfactors_belief Mu_KC11 = tf.transpose(Mu_CK)[:, :, None, None] # K x C x 1 x 1 Var_inv_KC11 = tf.stack( [rv.belief_params_['var_inv_K1'] for rv in factors_ith_nb], axis=1)[:, :, None] # eval pdf of axes[i] under all K scalar comps of ith nodes in all the cliques; result is K x C x M(=K) x Vi comp_prob = (2 * np.pi) ** (-0.5) * tf.sqrt(Var_inv_KC11) * \ tf.exp(-0.5 * (axes[i] - Mu_KC11) ** 2 * Var_inv_KC11) else: raise NotImplementedError comp_probs.append(comp_prob) # eval_hfactors_belief # multiply all dimensions together, then weigh by w einsum_eq = utils.outer_prod_einsum_equation(len(factor.nb), common_first_ndims=3) joint_comp_probs = tf.einsum(einsum_eq, *comp_probs) # K x C x M x V1 x V2 x ... Vn w_broadcast = tf.reshape(w, [K] + [1] * (len(factor.nb) + 2)) belief = tf.reduce_sum(w_broadcast * joint_comp_probs, axis=0) # C x M x V1 x V2 x ... Vn # above replaces the call belief = eval_hfactors_belief(factors, axes, w) # C x K x V1 x V2 x ... Vn einsum_eq = utils.outer_prod_einsum_equation(n, common_first_ndims=2) coefs = tf.einsum( einsum_eq, *coefs) # C x K x V1 x V2 x ... Vn; C x K grids of Hadamard products lpot = group_eval_log_potential_funs( factors_with_unique_log_potential_fun_types, unique_log_potential_fun_types, axes) # C x K x V1 x V2 x ... Vn log_belief = tf.log(belief) if neg_lpot_only: F = -lpot else: F = -lpot + log_belief w_broadcast = tf.reshape(w, [-1] + [1] * n) # K x 1 x 1 ... x 1 prod = tf.stop_gradient( w_broadcast * coefs * F) # weighted component-wise Hadamard products for C x K expectations factors_bfes = tf.reduce_sum(prod, axis=list(range( 1, n + 2))) # reduce the last (n+1) dimensions factors_aux_objs = tf.reduce_sum( prod * log_belief, axis=list(range(1, n + 2))) # reduce the last (n+1) dimensions sharing_counts = np.array([factor.sharing_count for factor in factors], dtype='float') bfe = tf.reduce_sum(sharing_counts * factors_bfes) aux_obj = tf.reduce_sum(sharing_counts * factors_aux_objs) return bfe, aux_obj
def hermiteRoots(self, n): return special.roots_hermite(n)