def _cross_validate_pce_degree(pce, train_samples, train_vals, min_degree=1, max_degree=3, hcross_strength=1, cv=10, solver_type='lasso_lars', verbosity=0): assert train_vals.shape[1] == 1 num_samples = train_samples.shape[1] if min_degree is None: min_degree = 2 if max_degree is None: max_degree = np.iinfo(int).max - 1 best_coef = None best_cv_score = -np.finfo(np.double).max best_degree = min_degree prev_num_terms = 0 if verbosity > 0: print("{:<8} {:<10} {:<18}".format( 'degree', 'num_terms', 'cv score', )) for degree in range(min_degree, max_degree + 1): indices = compute_hyperbolic_indices(pce.num_vars(), degree, hcross_strength) pce.set_indices(indices) if ((pce.num_terms() > 100000) and (100000 - prev_num_terms < pce.num_terms() - 100000)): break basis_matrix = pce.basis_matrix(train_samples) coef, cv_score = fit_linear_model(basis_matrix, train_vals, solver_type, cv=cv) pce.set_coefficients(coef) if verbosity > 0: print("{:<8} {:<10} {:<18} ".format(degree, pce.num_terms(), cv_score)) if (cv_score > best_cv_score): best_cv_score = cv_score best_coef = coef.copy() best_degree = degree if ((cv_score >= best_cv_score) and (degree - best_degree > 1)): break prev_num_terms = pce.num_terms() pce.set_indices( compute_hyperbolic_indices(pce.num_vars(), best_degree, hcross_strength)) pce.set_coefficients(best_coef) if verbosity > 0: print('best degree:', best_degree) return pce, best_cv_score, best_degree
def test_approximate_fixed_pce(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) coef = np.random.normal( 0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2 # set some coefficients to zero to make sure that different qoi # are treated correctly. II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 0] = 0 II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 1] = 0 poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) indices = pya.compute_hyperbolic_indices(num_vars, 1, 1) nfolds = 10 method = "polynomial_chaos" options = { "basis_type": "fixed", "variable": variable, "options": { "linear_solver_options": {}, "indices": indices, "solver_type": "lstsq" } } approx_list, residues_list, cv_score = \ cross_validate_approximation( train_samples, train_vals, options, nfolds, method, random_folds=False) solver = LinearLeastSquaresCV(cv=nfolds, random_folds=False) poly.set_indices(indices) basis_matrix = poly.basis_matrix(train_samples) solver.fit(basis_matrix, train_vals[:, 0:1]) assert np.allclose(solver.cv_score_, cv_score[0]) solver.fit(basis_matrix, train_vals[:, 1:2]) assert np.allclose(solver.cv_score_, cv_score[1])
def test_adaptive_approximate_increment_degree(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 3 poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree)) poly.set_coefficients( np.random.normal(0, 1, (poly.indices.shape[1], 1))) fun = poly max_degree = degree + 2 result = adaptive_approximate_polynomial_chaos_increment_degree( fun, variable, max_degree, max_nsamples=31, cond_tol=1e4, sample_growth_factor=2, verbose=0, oversampling_ratio=None, solver_type='lstsq', callback=None) print('Ntrain samples', result.train_samples.shape[1]) assert np.allclose( result.approx.coefficients[:poly.coefficients.shape[0]], poly.coefficients)
def help_cross_validate_pce_degree(self, solver_type, solver_options): print(solver_type, solver_options) num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 3 poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree, 1.0)) # factor of 2 does not pass test but 2.2 does num_samples = int(poly.num_terms() * 2.2) coef = np.random.normal(0, 1, (poly.indices.shape[1], 2)) coef[pya.nchoosek(num_vars + 2, 2):, 0] = 0 # for first qoi make degree 2 the best degree poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate( train_samples, train_vals, "polynomial_chaos", { "basis_type": "hyperbolic_cross", "variable": variable, "options": { "verbose": 3, "solver_type": solver_type, "min_degree": 1, "max_degree": degree + 1, "linear_solver_options": solver_options } }).approx num_validation_samples = 10 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) assert np.allclose(poly(validation_samples), true_poly(validation_samples)) poly = copy.deepcopy(true_poly) approx_res = cross_validate_pce_degree( poly, train_samples, train_vals, 1, degree + 1, solver_type=solver_type, linear_solver_options=solver_options) assert np.allclose(approx_res.degrees, [2, 3])
def test_pce_basis_expansion(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) coef = np.random.normal( 0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2 # set some coefficients to zero to make sure that different qoi # are treated correctly. II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 0] = 0 II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 1] = 0 poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate( train_samples, train_vals, "polynomial_chaos", { "basis_type": "expanding_basis", "variable": variable, "options": { "max_num_expansion_steps_iter": 1, "verbose": 3, "max_num_terms": 1000, "max_num_step_increases": 2, "max_num_init_terms": 33 } }).approx num_validation_samples = 100 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) validation_samples = train_samples error = np.linalg.norm( poly(validation_samples) - true_poly(validation_samples)) / np.sqrt(num_validation_samples) assert np.allclose(poly(validation_samples), true_poly(validation_samples), atol=1e-8), error
def __init__(self, mesh_dof=100, num_terms=35): self.mesh = np.linspace(-1., 1., mesh_dof) self.num_terms = num_terms variable = [uniform(-1, 2)] var_trans = pya.AffineRandomVariableTransformation(variable) self.poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) self.poly.configure(poly_opts) self.poly.set_indices( pya.compute_hyperbolic_indices(1, self.num_terms - 1))
def test_pce_sensitivities_of_ishigami_function(self): nsamples = 1500 nvars, degree = 3, 18 univariate_variables = [uniform(-np.pi, 2 * np.pi)] * nvars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) indices = pya.compute_hyperbolic_indices(nvars, degree, 1.0) poly.set_indices(indices) #print('No. PCE Terms',indices.shape[1]) samples = pya.generate_independent_random_samples( var_trans.variable, nsamples) values = ishigami_function(samples) basis_matrix = poly.basis_matrix(samples) coef = np.linalg.lstsq(basis_matrix, values, rcond=None)[0] poly.set_coefficients(coef) nvalidation_samples = 1000 validation_samples = pya.generate_independent_random_samples( var_trans.variable, nvalidation_samples) validation_values = ishigami_function(validation_samples) poly_validation_vals = poly(validation_samples) abs_error = np.linalg.norm(poly_validation_vals - validation_values ) / np.sqrt(nvalidation_samples) #print('Abs. Error',abs_error) pce_main_effects, pce_total_effects =\ pya.get_main_and_total_effect_indices_from_pce( poly.get_coefficients(), poly.get_indices()) mean, variance, main_effects, total_effects, sobol_indices, \ sobol_interaction_indices = get_ishigami_funciton_statistics() assert np.allclose(poly.mean(), mean) assert np.allclose(poly.variance(), variance) assert np.allclose(pce_main_effects, main_effects) assert np.allclose(pce_total_effects, total_effects) interaction_terms, pce_sobol_indices = get_sobol_indices( poly.get_coefficients(), poly.get_indices(), max_order=3) assert np.allclose(pce_sobol_indices, sobol_indices)
def test_pce_basis_expansion(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) poly.set_coefficients((np.random.normal(0, 1, poly.indices.shape[1]) / (degrees + 1)**2)[:, np.newaxis]) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate(train_samples, train_vals, 'polynomial_chaos', { 'basis_type': 'expanding_basis', 'variable': variable }) num_validation_samples = 100 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) validation_samples = train_samples error = np.linalg.norm( poly(validation_samples) - true_poly(validation_samples)) / np.sqrt(num_validation_samples) assert np.allclose( poly(validation_samples),true_poly(validation_samples),atol=1e-8),\ error
def test_cross_validate_pce_degree(self): num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 3 poly.set_indices(pya.compute_hyperbolic_indices(num_vars, degree, 1.0)) num_samples = poly.num_terms() * 2 poly.set_coefficients( np.random.normal(0, 1, (poly.indices.shape[1], 1))) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) true_poly = poly poly = approximate(train_samples, train_vals, 'polynomial_chaos', { 'basis_type': 'hyperbolic_cross', 'variable': variable }) num_validation_samples = 10 validation_samples = pya.generate_independent_random_samples( variable, num_validation_samples) assert np.allclose(poly(validation_samples), true_poly(validation_samples)) poly = copy.deepcopy(true_poly) poly, best_degree = cross_validate_pce_degree(poly, train_samples, train_vals, 1, degree + 2) assert best_degree == degree
def test_cross_validate_approximation_after_regularization_selection(self): """ This test is useful as it shows how to use cross_validate_approximation to produce a list of approximations on each cross validation fold once regularization parameters have been chosen. These can be used to show variance in predictions of values, sensitivity indices, etc. Ideally this could be avoided if sklearn stored the coefficients and alphas for each fold and then we can just find the coefficients that correspond to the first time the path drops below the best_alpha """ num_vars = 2 univariate_variables = [stats.uniform(-1, 2)] * num_vars variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree, hcross_strength = 7, 0.4 poly.set_indices( pya.compute_hyperbolic_indices(num_vars, degree, hcross_strength)) num_samples = poly.num_terms() * 2 degrees = poly.indices.sum(axis=0) coef = np.random.normal( 0, 1, (poly.indices.shape[1], 2)) / (degrees[:, np.newaxis] + 1)**2 # set some coefficients to zero to make sure that different qoi # are treated correctly. II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 0] = 0 II = np.random.permutation(coef.shape[0])[:coef.shape[0] // 2] coef[II, 1] = 0 poly.set_coefficients(coef) train_samples = pya.generate_independent_random_samples( variable, num_samples) train_vals = poly(train_samples) # true_poly = poly result = approximate(train_samples, train_vals, "polynomial_chaos", { "basis_type": "expanding_basis", "variable": variable }) # Even with the same folds, iterative methods such as Lars, LarsLasso # and OMP will not have cv_score from approximate and cross validate # approximation exactly the same because iterative methods interpolate # residuals to compute cross validation scores nfolds = 10 linear_solver_options = [{ "alpha": result.reg_params[0] }, { "alpha": result.reg_params[1] }] indices = [ result.approx.indices[:, np.where(np.absolute(c) > 0)[0]] for c in result.approx.coefficients.T ] options = { "basis_type": "fixed", "variable": variable, "options": { "linear_solver_options": linear_solver_options, "indices": indices } } approx_list, residues_list, cv_score = \ cross_validate_approximation( train_samples, train_vals, options, nfolds, "polynomial_chaos", random_folds="sklearn") assert (np.all(cv_score < 6e-14) and np.all(result.scores < 4e-13))
def expanding_basis_omp_pce(pce, train_samples, train_vals, hcross_strength=1, verbosity=1, max_num_terms=None, solver_type='lasso_lars', cv=10, restriction_tol=np.finfo(float).eps * 2): r""" Iteratively expand and restrict the polynomial basis and use cross validation to find the best basis [JESJCP2015]_ Parameters ---------- train_samples : np.ndarray (nvars,nsamples) The inputs of the function used to train the approximation train_vals : np.ndarray (nvars,nsamples) The values of the function at ``train_samples`` hcross_strength : float The strength of the hyperbolic cross index set. hcross_strength must be in (0,1]. A value of 1 produces total degree polynomials cv : integer The number of cross validation folds used to compute the cross validation error solver_type : string The type of regression used to train the polynomial - 'lasso_lars' - 'lars' - 'lasso' - 'omp' verbosity : integer Controls the amount of information printed to screen restriction_tol : float The tolerance used to prune inactive indices Returns ------- pce : :class:`pyapprox.multivariate_polynomials.PolynomialChaosExpansion` The PCE approximation References ---------- .. [JESJCP2015] `J.D. Jakeman, M.S. Eldred, and K. Sargsyan. Enhancing l1-minimization estimates of polynomial chaos expansions using basis selection. Journal of Computational Physics, 289(0):18 – 34, 2015 <https://doi.org/10.1016/j.jcp.2015.02.025>`_ """ assert train_vals.shape[1] == 1 num_vars = pce.num_vars() if max_num_terms is None: max_num_terms = 10 * train_vals.shape[1] degree = 2 prev_num_terms = 0 while True: indices = compute_hyperbolic_indices(num_vars, degree, hcross_strength) num_terms = indices.shape[1] if (num_terms > max_num_terms): break degree += 1 prev_num_terms = num_terms if (abs(num_terms - max_num_terms) > abs(prev_num_terms - max_num_terms)): degree -= 1 pce.set_indices( compute_hyperbolic_indices(num_vars, degree, hcross_strength)) if verbosity > 0: msg = f'Initializing basis with hyperbolic cross of degree {degree} and ' msg += f' strength {hcross_strength} with {pce.num_terms()} terms' print(msg) basis_matrix = pce.basis_matrix(train_samples) best_coef, best_cv_score = fit_linear_model(basis_matrix, train_vals, solver_type, cv=cv) pce.set_coefficients(best_coef) best_indices = pce.get_indices() if verbosity > 0: print("{:<10} {:<10} {:<18}".format('nterms', 'nnz terms', 'cv score')) print("{:<10} {:<10} {:<18}".format(pce.num_terms(), np.count_nonzero(pce.coefficients), best_cv_score)) best_cv_score_iter = best_cv_score best_num_expansion_steps = 3 it = 0 best_it = 0 while True: max_num_expansion_steps = 1 best_num_expansion_steps_iter = best_num_expansion_steps while True: # -------------- # # Expand basis # # -------------- # num_expansion_steps = 0 indices = restrict_basis(pce.indices, pce.coefficients, restriction_tol) while ((num_expansion_steps < max_num_expansion_steps) and (num_expansion_steps < best_num_expansion_steps)): new_indices = expand_basis(pce.indices) pce.set_indices(np.hstack([pce.indices, new_indices])) num_terms = pce.num_terms() num_expansion_steps += 1 # -----------------# # Compute solution # # -----------------# basis_matrix = pce.basis_matrix(train_samples) coef, cv_score = fit_linear_model(basis_matrix, train_vals, solver_type, cv=cv) pce.set_coefficients(coef) if verbosity > 0: print("{:<10} {:<10} {:<18}".format( pce.num_terms(), np.count_nonzero(pce.coefficients), cv_score)) if (cv_score > best_cv_score_iter): best_cv_score_iter = cv_score best_indices_iter = pce.indices.copy() best_coef_iter = pce.coefficients.copy() best_num_expansion_steps_iter = num_expansion_steps if (num_terms >= max_num_terms): break if (max_num_expansion_steps >= 3): break max_num_expansion_steps += 1 if (best_cv_score_iter > best_cv_score): best_cv_score = best_cv_score_iter best_coef = best_coef_iter.copy() best_indices = best_indices_iter.copy() best_num_expansion_steps = best_num_expansion_steps_iter best_it = it elif (it - best_it >= 2): break it += 1 nindices = best_indices.shape[1] I = np.nonzero(best_coef[:, 0])[0] pce.set_indices(best_indices[:, I]) pce.set_coefficients(best_coef[I]) if verbosity > 0: msg = f'Final basis has {pce.num_terms()} terms selected from {nindices}' msg += f' using {train_samples.shape[1]} samples' print(msg) return pce, best_cv_score
def cross_validate_pce_degree(pce, train_samples, train_vals, min_degree=1, max_degree=3, hcross_strength=1, cv=10, solver_type='lasso_lars', verbosity=0): r""" Use cross validation to find the polynomial degree which best fits the data. A polynomial is constructed for each degree and the degree with the highest cross validation score is returned. Parameters ---------- train_samples : np.ndarray (nvars,nsamples) The inputs of the function used to train the approximation train_vals : np.ndarray (nvars,nsamples) The values of the function at ``train_samples`` min_degree : integer The minimum degree to consider min_degree : integer The maximum degree to consider. All degrees in ``range(min_degree,max_deree+1)`` are considered hcross_strength : float The strength of the hyperbolic cross index set. hcross_strength must be in (0,1]. A value of 1 produces total degree polynomials cv : integer The number of cross validation folds used to compute the cross validation error solver_type : string The type of regression used to train the polynomial - 'lasso_lars' - 'lars' - 'lasso' - 'omp' verbosity : integer Controls the amount of information printed to screen Returns ------- pce : :class:`pyapprox.multivariate_polynomials.PolynomialChaosExpansion` The PCE approximation """ num_samples = train_samples.shape[1] if min_degree is None: min_degree = 2 if max_degree is None: max_degree = np.iinfo(int).max - 1 best_coef = None best_cv_score = -np.finfo(np.double).max best_degree = min_degree prev_num_terms = 0 if verbosity > 0: print("{:<8} {:<10} {:<18}".format( 'degree', 'num_terms', 'cv score', )) for degree in range(min_degree, max_degree + 1): indices = compute_hyperbolic_indices(pce.num_vars(), degree, hcross_strength) pce.set_indices(indices) if ((pce.num_terms() > 100000) and (100000 - prev_num_terms < pce.num_terms() - 100000)): break basis_matrix = pce.basis_matrix(train_samples) coef, cv_score = fit_linear_model(basis_matrix, train_vals, solver_type, cv=cv) pce.set_coefficients(coef) if verbosity > 0: print("{:<8} {:<10} {:<18} ".format(degree, pce.num_terms(), cv_score)) if (cv_score > best_cv_score): best_cv_score = cv_score best_coef = coef.copy() best_degree = degree if ((cv_score >= best_cv_score) and (degree - best_degree > 1)): break prev_num_terms = pce.num_terms() pce.set_indices( compute_hyperbolic_indices(pce.num_vars(), best_degree, hcross_strength)) pce.set_coefficients(best_coef) if verbosity > 0: print('best degree:', best_degree) return pce, best_degree
def _expanding_basis_omp_pce(pce, train_samples, train_vals, hcross_strength=1, verbosity=1, max_num_terms=None, solver_type='lasso_lars', cv=10, restriction_tol=np.finfo(float).eps * 2): assert train_vals.shape[1] == 1 num_vars = pce.num_vars() if max_num_terms is None: max_num_terms = 10 * train_vals.shape[1] degree = 2 prev_num_terms = 0 while True: indices = compute_hyperbolic_indices(num_vars, degree, hcross_strength) num_terms = indices.shape[1] if (num_terms > max_num_terms): break degree += 1 prev_num_terms = num_terms if (abs(num_terms - max_num_terms) > abs(prev_num_terms - max_num_terms)): degree -= 1 pce.set_indices( compute_hyperbolic_indices(num_vars, degree, hcross_strength)) if verbosity > 0: msg = f'Initializing basis with hyperbolic cross of degree {degree} and ' msg += f' strength {hcross_strength} with {pce.num_terms()} terms' print(msg) basis_matrix = pce.basis_matrix(train_samples) best_coef, best_cv_score = fit_linear_model(basis_matrix, train_vals, solver_type, cv=cv) pce.set_coefficients(best_coef) best_indices = pce.get_indices() if verbosity > 0: print("{:<10} {:<10} {:<18}".format('nterms', 'nnz terms', 'cv score')) print("{:<10} {:<10} {:<18}".format(pce.num_terms(), np.count_nonzero(pce.coefficients), best_cv_score)) best_cv_score_iter = best_cv_score best_num_expansion_steps = 3 it = 0 best_it = 0 while True: max_num_expansion_steps = 1 best_num_expansion_steps_iter = best_num_expansion_steps while True: # -------------- # # Expand basis # # -------------- # num_expansion_steps = 0 indices = restrict_basis(pce.indices, pce.coefficients, restriction_tol) while ((num_expansion_steps < max_num_expansion_steps) and (num_expansion_steps < best_num_expansion_steps)): new_indices = expand_basis(pce.indices) pce.set_indices(np.hstack([pce.indices, new_indices])) num_terms = pce.num_terms() num_expansion_steps += 1 # -----------------# # Compute solution # # -----------------# basis_matrix = pce.basis_matrix(train_samples) coef, cv_score = fit_linear_model(basis_matrix, train_vals, solver_type, cv=cv) pce.set_coefficients(coef) if verbosity > 0: print("{:<10} {:<10} {:<18}".format( pce.num_terms(), np.count_nonzero(pce.coefficients), cv_score)) if (cv_score > best_cv_score_iter): best_cv_score_iter = cv_score best_indices_iter = pce.indices.copy() best_coef_iter = pce.coefficients.copy() best_num_expansion_steps_iter = num_expansion_steps if (num_terms >= max_num_terms): break if (max_num_expansion_steps >= 3): break max_num_expansion_steps += 1 if (best_cv_score_iter > best_cv_score): best_cv_score = best_cv_score_iter best_coef = best_coef_iter.copy() best_indices = best_indices_iter.copy() best_num_expansion_steps = best_num_expansion_steps_iter best_it = it elif (it - best_it >= 2): break it += 1 nindices = best_indices.shape[1] I = np.nonzero(best_coef[:, 0])[0] pce.set_indices(best_indices[:, I]) pce.set_coefficients(best_coef[I]) if verbosity > 0: msg = f'Final basis has {pce.num_terms()} terms selected from {nindices}' msg += f' using {train_samples.shape[1]} samples' print(msg) return pce, best_cv_score
def test_marginalize_polynomial_chaos_expansions(self): univariate_variables = [uniform(-1, 2), norm(0, 1), uniform(-1, 2)] variable = pya.IndependentMultivariateRandomVariable( univariate_variables) var_trans = pya.AffineRandomVariableTransformation(variable) num_vars = len(univariate_variables) poly = pya.PolynomialChaosExpansion() poly_opts = pya.define_poly_options_from_variable_transformation( var_trans) poly.configure(poly_opts) degree = 2 indices = pya.compute_hyperbolic_indices(num_vars, degree, 1) poly.set_indices(indices) poly.set_coefficients(np.ones((indices.shape[1], 1))) pce_main_effects, pce_total_effects =\ pya.get_main_and_total_effect_indices_from_pce( poly.get_coefficients(), poly.get_indices()) print(poly.num_terms()) for ii in range(num_vars): # Marginalize out 2 variables xx = np.linspace(-1, 1, 101) inactive_idx = np.hstack( (np.arange(ii), np.arange(ii + 1, num_vars))) marginalized_pce = pya.marginalize_polynomial_chaos_expansion( poly, inactive_idx, center=True) mvals = marginalized_pce(xx[None, :]) variable_ii = variable.all_variables()[ii:ii + 1] var_trans_ii = pya.AffineRandomVariableTransformation(variable_ii) poly_ii = pya.PolynomialChaosExpansion() poly_opts_ii = \ pya.define_poly_options_from_variable_transformation( var_trans_ii) poly_ii.configure(poly_opts_ii) indices_ii = compute_hyperbolic_indices(1, degree, 1.) poly_ii.set_indices(indices_ii) poly_ii.set_coefficients(np.ones((indices_ii.shape[1], 1))) pvals = poly_ii(xx[None, :]) # import matplotlib.pyplot as plt # plt.plot(xx, pvals) # plt.plot(xx, mvals, '--') # plt.show() assert np.allclose(mvals, pvals - poly.mean()) assert np.allclose(poly_ii.variance() / poly.variance(), pce_main_effects[ii]) poly_ii.coefficients /= np.sqrt(poly.variance()) assert np.allclose(poly_ii.variance(), pce_main_effects[ii]) # Marginalize out 1 variable xx = pya.cartesian_product([xx] * 2) inactive_idx = np.array([ii]) marginalized_pce = pya.marginalize_polynomial_chaos_expansion( poly, inactive_idx, center=True) mvals = marginalized_pce(xx) variable_ii = variable.all_variables()[:ii] +\ variable.all_variables()[ii+1:] var_trans_ii = pya.AffineRandomVariableTransformation(variable_ii) poly_ii = pya.PolynomialChaosExpansion() poly_opts_ii = \ pya.define_poly_options_from_variable_transformation( var_trans_ii) poly_ii.configure(poly_opts_ii) indices_ii = pya.compute_hyperbolic_indices(2, degree, 1.) poly_ii.set_indices(indices_ii) poly_ii.set_coefficients(np.ones((indices_ii.shape[1], 1))) pvals = poly_ii(xx) assert np.allclose(mvals, pvals - poly.mean())
def _cross_validate_pce_degree(pce, train_samples, train_vals, min_degree=1, max_degree=3, hcross_strength=1, linear_solver_options={'cv': 10}, solver_type='lasso', verbose=0): assert train_vals.shape[1] == 1 num_samples = train_samples.shape[1] if min_degree is None: min_degree = 2 if max_degree is None: max_degree = np.iinfo(int).max - 1 best_coef = None best_cv_score = np.finfo(np.double).max best_degree = min_degree prev_num_terms = 0 if verbose > 0: print("{:<8} {:<10} {:<18}".format( 'degree', 'num_terms', 'cv score', )) rng_state = np.random.get_state() for degree in range(min_degree, max_degree + 1): indices = compute_hyperbolic_indices(pce.num_vars(), degree, hcross_strength) pce.set_indices(indices) if ((pce.num_terms() > 100000) and (100000 - prev_num_terms < pce.num_terms() - 100000)): break basis_matrix = pce.basis_matrix(train_samples) # use the same state (thus cross validation folds) for each degree np.random.set_state(rng_state) coef, cv_score, reg_param = fit_linear_model(basis_matrix, train_vals, solver_type, **linear_solver_options) np.random.set_state(rng_state) pce.set_coefficients(coef) if verbose > 0: print("{:<8} {:<10} {:<18} ".format(degree, pce.num_terms(), cv_score)) if ((cv_score >= best_cv_score) and (degree - best_degree > 1)): break if (cv_score < best_cv_score): best_cv_score = cv_score best_coef = coef.copy() best_degree = degree best_reg_param = reg_param prev_num_terms = pce.num_terms() pce.set_indices( compute_hyperbolic_indices(pce.num_vars(), best_degree, hcross_strength)) pce.set_coefficients(best_coef) if verbose > 0: print('best degree:', best_degree) return pce, best_cv_score, best_degree, best_reg_param