def cv_vs_error_study( build_pts, build_vals, domain, test_pts, test_vals, results_file = None, cv_file = None, solver_type = 2 ): num_dims = build_pts.shape[0] if ( num_dims == 10 ): max_order = 5 elif ( num_dims == 15 ): max_order = 4 else: max_order = 3 poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain ) orders = numpy.arange( 1, max_order + 1 ) solvers = numpy.array( [solver_type], numpy.int32 ) cv_params_grid_array = cartesian_product( [solvers,orders] ) cv_params_grid = [] for i in xrange( cv_params_grid_array.shape[0] ): cv_params = {} cv_params['solver'] = numpy.int32( cv_params_grid_array[i,0] ) cv_params['order'] = numpy.int32( cv_params_grid_array[i,1] ) num_pce_terms = polynomial_space_dimension( num_dims, cv_params['order'] ) if ( cv_params['solver'] <= 1 and num_pce_terms >= build_pts.shape[1] ): cv_params['lambda'] = 1.e-12 cv_params_grid.append( cv_params ) # print cv_params_grid # cv_iterator = LeaveOneOutCrossValidationIterator() cv_iterator = KFoldCrossValidationIterator( num_folds = 20 ) CV = GridSearchCrossValidation( cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = True ) t0 = time.time() CV.run( build_pts, build_vals, cv_params_grid ) time_taken = time.time() - t0 print 'cross validation took ', time_taken, ' seconds' print "################" print "Best cv params: ", CV.best_cv_params print "Best cv score: ", CV.best_score print "################" for order in orders: residual_norms = numpy.empty( len( CV.cv_params_set ), numpy.double ) scores = numpy.empty( len( CV.cv_params_set ), numpy.double ) k = 0 for i in xrange( len( CV.cv_params_set ) ): if ( CV.cv_params_set[i]['order'] == order ): residual_norms[k] = CV.cv_params_set[i]['norm_residual'] scores[k] = CV.scores[i] k += 1 residual_norms.resize( k ) scores.resize( k ) pce = PCE( num_dims, order = order, basis = basis, func_domain = domain ) V = pce.vandermonde( build_pts ).T pce.set_solver( CV.best_cv_params['solver'] ) # pce.linear_solver.max_iterations = 3 sols, sol_metrics = pce.linear_solver.solve( V, build_vals ) from sklearn.linear_model import orthogonal_mp l2_error = numpy.empty( ( sols.shape[1] ), numpy.double ) residuals = numpy.empty( ( sols.shape[1] ), numpy.double ) test_pts = numpy.random.uniform( 0., 1., ( num_dims, 1000 ) ) f = GenzModel( domain, 'oscillatory' ) # f.set_coefficients( 4.5, 'no-decay' ) f.set_coefficients( 4.5, 'quadratic-decay' ) test_vals = f( test_pts ).reshape( ( test_pts.shape[1], 1 ) ) for i in xrange( sols.shape[1] ): coeff = sols[:,i] pce.set_coefficients( coeff ) residuals[i] = numpy.linalg.norm( build_vals - pce.evaluate_set( build_pts ) ) num_test_pts = test_pts.shape[1] pce_vals_pred = pce.evaluate_set( test_pts ).T error = test_vals.squeeze() - pce_vals_pred l2_error[i] = numpy.linalg.norm( error ) / numpy.sqrt( num_test_pts ) import pylab print residuals, l2_error print residual_norms, scores pylab.loglog( residuals, l2_error, label = str( order ) + 'true' ) pylab.loglog( residual_norms, scores, label = str( order )+'-cv' ) pylab.xlim([1e-3,10]) pylab.legend() pylab.show()
def pce_study( build_pts, build_vals, domain, test_pts, test_vals, results_file = None, cv_file = None, solver_type = 2 ): num_dims = build_pts.shape[0] index_generator = IndexGenerator() poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain ) if ( solver_type == 1 ): num_folds = build_pts.shape[1] else: num_folds = 20 index_norm_orders = numpy.linspace( 0.4, 1.0, 4 ) #if (solver_tupe == 1): # index_norm_orders = [.4,.5,.6,.7,.8,.9,1.] #solvers = numpy.array( [solver_type], numpy.int32 ) #cv_params_grid_array = cartesian_product( [solvers,orders] ) cv_params_grid = [] for index_norm_order in index_norm_orders: level = 2 # determine what range of orders to consider. # spefically consider any order that results in a pce with terms <= 3003 while ( True ): #index_generator.set_parameters( num_dims, level, # index_norm_order = index_norm_order) indices = index_generator.get_isotropic_indices( num_dims, level, index_norm_order ) num_indices = len( indices ) print level, index_norm_order, len ( indices ) if ( num_indices > 3003 ): break cv_params = {} cv_params['solver'] = solver_type cv_params['order'] = level cv_params['index_norm_order'] = index_norm_order if ( cv_params['solver'] > 1 or num_indices <= build_pts.shape[1] ): # only do least squares on over-determined systems cv_params_grid.append( cv_params ) level += 1 print cv_params_grid # cv_iterator = LeaveOneOutCrossValidationIterator() cv_iterator = KFoldCrossValidationIterator( num_folds = num_folds ) CV = GridSearchCrossValidation( cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = True ) t0 = time.time() CV.run( build_pts, build_vals, cv_params_grid ) time_taken = time.time() - t0 print 'cross validation took ', time_taken, ' seconds' print "################" print "Best cv params: ", CV.best_cv_params print "Best cv score: ", CV.best_score print "################" #for i in xrange( len( CV.cv_params_set ) ): # print CV.cv_params_set[i], CV.scores[i] best_order = CV.best_cv_params['order'] best_index_norm_order = CV.best_cv_params['index_norm_order'] best_pce = PCE( num_dims, order = best_order, basis = basis, func_domain = domain, index_norm_order = best_index_norm_order) V = best_pce.vandermonde( build_pts ).T best_pce.set_solver( CV.best_cv_params['solver'] ) if cv_params['solver'] != 1 and cv_params['solver'] != 5: best_res_tol = CV.best_cv_params['norm_residual'] best_pce.linear_solver.residual_tolerance = best_res_tol sols, sol_metrics = best_pce.linear_solver.solve( V, build_vals ) coeff = sols[:,-1] best_pce.set_coefficients( coeff ) error = abs( build_vals - best_pce.evaluate_set( build_pts ) ) print max( error ) print 'Evaluating best pce at test points' num_test_pts = test_pts.shape[1] pce_vals_pred = best_pce.evaluate_set( test_pts ).T print test_vals.shape, pce_vals_pred.shape error = test_vals.squeeze() - pce_vals_pred linf_error = numpy.max( numpy.absolute( error ) ) l2_error = numpy.sqrt( numpy.dot( error.T, error ) / num_test_pts ) mean = numpy.mean( pce_vals_pred ) var = numpy.var( pce_vals_pred ) pce_mean = best_pce.mean() pce_var = best_pce.variance() if results_file is not None: results_file.write( '%1.15e' %linf_error + ',' + '%1.15e' %l2_error + ',' + '%1.15e' %mean + ',' + '%1.15e' %var + ',%1.15e' %pce_mean + ',' + '%1.15e' %pce_var + '\n') print "linf error: ", linf_error print "l2 error: ", l2_error print "mean: ", mean print "var: ", var print "pce mean: ", pce_mean print "pce var: ", pce_var
def pce_study( build_pts, build_vals, domain, test_pts, test_vals, results_file = None, cv_file = None, solver_type = 2 ): num_dims = build_pts.shape[0] index_generator = IndexGenerator() poly_1d = [ LegendrePolynomial1D() ] basis = TensorProductBasis( num_dims, poly_1d ) pce = PCE( num_dims, order = 0, basis = basis, func_domain = domain ) if ( solver_type == 1 ): num_folds = build_pts.shape[1] else: num_folds = 20 index_norm_orders = numpy.linspace( 0.4, 1.0, 4 ) #solvers = numpy.array( [solver_type], numpy.int32 ) #cv_params_grid_array = cartesian_product( [solvers,orders] ) cv_params_grid = [] for index_norm_order in index_norm_orders: level = 2 # determine what range of orders to consider. # spefically consider any order that results in a pce with terms <= 3003 while ( True ): index_generator.set_parameters( num_dims, level, index_norm_order = index_norm_order ) index_generator.build_isotropic_index_set() print level, index_norm_order, index_generator.num_indices if ( index_generator.num_indices > 3003 ): break cv_params = {} cv_params['solver'] = solver_type cv_params['order'] = level cv_params['index_norm_order'] = index_norm_order if ( cv_params['solver'] > 1 or index_generator.num_indices <= build_pts.shape[1] ): # only do least squares on over-determined systems cv_params_grid.append( cv_params ) else: break level += 1 print cv_params_grid # cv_iterator = LeaveOneOutCrossValidationIterator() cv_iterator = KFoldCrossValidationIterator( num_folds = num_folds ) CV = GridSearchCrossValidation( cv_iterator, pce, use_predictor_cross_validation = True, use_fast_predictor_cross_validation = True ) t0 = time.time() CV.run( build_pts, build_vals, cv_params_grid ) time_taken = time.time() - t0 print 'cross validation took ', time_taken, ' seconds' print "################" print "Best cv params: ", CV.best_cv_params print "Best cv score: ", CV.best_score print "################" #for i in xrange( len( CV.cv_params_set ) ): # print CV.cv_params_set[i], CV.scores[i] best_order = CV.best_cv_params['order'] best_index_norm_order = CV.best_cv_params['index_norm_order'] best_pce = PCE( num_dims, order = best_order, basis = basis, func_domain = domain, index_norm_order = best_index_norm_order) V = best_pce.vandermonde( build_pts ).T best_pce.set_solver( CV.best_cv_params['solver'] ) if cv_params['solver'] > 1 : best_res_tol = CV.best_cv_params['norm_residual'] best_pce.linear_solver.residual_tolerance = best_res_tol sols, sol_metrics = best_pce.linear_solver.solve( V, build_vals ) coeff = sols[:,-1] best_pce.set_coefficients( coeff ) error = abs( build_vals - best_pce.evaluate_set( build_pts ) ) print max( error ) print 'Evaluating best pce at test points' num_test_pts = test_pts.shape[1] pce_vals_pred = best_pce.evaluate_set( test_pts ).T print test_vals.shape, pce_vals_pred.shape error = test_vals.squeeze() - pce_vals_pred linf_error = numpy.max( numpy.absolute( error ) ) l2_error = numpy.sqrt( numpy.dot( error.T, error ) / num_test_pts ) mean = numpy.mean( pce_vals_pred ) var = numpy.var( pce_vals_pred ) pce_mean = best_pce.mean() pce_var = best_pce.variance() if results_file is not None: results_file.write( '%1.15e' %linf_error + ',' + '%1.15e' %l2_error + ',' + '%1.15e' %mean + ',' + '%1.15e' %var + ',%1.15e' %pce_mean + ',' + '%1.15e' %pce_var + '\n') print "linf error: ", linf_error print "l2 error: ", l2_error print "mean: ", mean print "var: ", var print "pce mean: ", pce_mean print "pce var: ", pce_var me, te, ie = best_pce.get_sensitivities() interaction_values, interaction_terms = best_pce.get_interactions() show = False fignum = 1 filename = 'oscillator-individual-interactions.png' plot_interaction_values( interaction_values, interaction_terms, title = 'Sobol indices', truncation_pct = 0.95, filename = filename, show = show, fignum = fignum ) fignum += 1 filename = 'oscillator-dimension-interactions.png' plot_interaction_effects( ie, title = 'Dimension-wise joint effects', truncation_pct = 0.95, filename = filename, show = show,fignum = fignum ) fignum += 1 filename = 'oscillator-main-effects.png' plot_main_effects( me, truncation_pct = 0.95, title = 'Main effect sensitivity indices', filename = filename, show = show, fignum = fignum ) fignum += 1 filename = 'oscillator-total-effects.png' plot_total_effects( te, truncation_pct = 0.95, title = 'Total effect sensitivity indices', filename = filename, show = show, fignum = fignum ) fignum += 1 from scipy.stats.kde import gaussian_kde pylab.figure( fignum ) pce_kde = gaussian_kde( pce_vals_pred ) pce_kde_x = numpy.linspace( pce_vals_pred.min(), pce_vals_pred.max(), 100 ) pce_kde_y = pce_kde( pce_kde_x ) pylab.plot( pce_kde_x, pce_kde_y,label = 'pdf of surrogate' ) true_kde = gaussian_kde( test_vals ) true_kde_x = numpy.linspace( test_vals.min(), test_vals.max(), 100 ) true_kde_y = true_kde( true_kde_x ) pylab.plot( true_kde_x, true_kde_y, label = 'true pdf' ) pylab.legend(loc=2) pylab.show()