def testExp2d(self): trainSamples = np.loadtxt("exp_2d.csv").T # build parameter set dist = SGDEdist.byLearnerSGDEConfig( trainSamples, config={ "grid_level": 7, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 10, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": False, "sgde_makePositive": True, "sgde_makePositive_candidateSearchAlgorithm": "joined", "sgde_makePositive_interpolationAlgorithm": "interpolateBoundaries1d", "sgde_unitIntegrand": True }) fig, ax, _ = plotDensity3d(dist) ax.scatter(trainSamples[:, 0], trainSamples[:, 1], np.zeros(trainSamples.shape[0])) ax.set_title("vol=%.12f" % dist.vol) fig.show() plt.show()
def test1DNormalDist(self): # prepare data U = dists.TNormal(0.5, .2, -1, 2) np.random.seed(1234567) trainSamples = np.array([U.rvs(1000)]).T testSamples = np.array([U.rvs(1000)]).T # build parameter set dist = SGDEdist.byLearnerSGDEConfig( trainSamples, config={ "grid_level": 6, "grid_type": "modlinear", "grid_maxDegree": 3, "refinement_numSteps": 0, "refinement_numPoints": 10, "solver_threshold": 1e-10, "solver_verbose": True, "regularization_type": "Laplace", "crossValidation_enable": True, "crossValidation_kfold": 5, "crossValidation_silent": False, "sgde_makePositive": False, "sgde_makePositive_candidateSearchAlgorithm": "fullGrid", "sgde_makePositive_interpolationAlgorithm": "setToZero", "sgde_makePositive_verbose": True, "sgde_unitIntegrand": False }, bounds=np.array([U.getBounds()])) fig = plt.figure() plotDensity1d(U, label="analytic") plotDensity1d(dist, label="sgde") plt.legend() # plt.title("mean = %g ~ %g (err=%g), var = %g ~ %g (err=%g)" % (np.mean(trainSamples), # dist.mean(), # np.abs(np.mean(trainSamples) - dist.mean()) / np.mean(trainSamples), # np.var(trainSamples), # dist.var(), # np.abs(np.var(trainSamples) - dist.var()) / np.var(trainSamples) # )) print("1d: mean = %g ~ %g (err=%g)" % (np.mean(trainSamples), dist.mean(), (np.abs(np.mean(trainSamples) - dist.mean()) / np.mean(trainSamples)))) print("1d: var = %g ~ %g (err=%g)" % (np.var(trainSamples), dist.var(), (np.abs(np.var(trainSamples) - dist.var()) / np.var(trainSamples)))) print("KL = %g" % U.klDivergence(dist, testSamples, testSamples)) print("CE = %g" % dist.crossEntropy(testSamples)) print("MSE = %g" % dist.l2error(U, testSamples, testSamples)) plt.show()
def test2DNormalDist(self): # prepare data U = dists.J( [dists.Normal(2.0, .5, -1, 4), dists.Normal(1.0, .5, -1, 3)]) U = dists.J( [dists.Normal(0.5, .5, -1, 2), dists.Normal(0.5, .4, -1, 2)]) np.random.seed(1234567) trainSamples = U.rvs(300) testSamples = U.rvs(1000) # build parameter set dist = SGDEdist.byLearnerSGDEConfig( trainSamples, config={ "grid_level": 5, "grid_type": "modlinear", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": False, "sgde_makePositive": False, "sgde_makePositive_candidateSearchAlgorithm": "joined", "sgde_makePositive_interpolationAlgorithm": "setToZero", "sgde_makePositive_generateConsistentGrid": False, "sgde_makePositive_verbose": True, "sgde_unitIntegrand": True }, bounds=U.getBounds()) fig = plt.figure() plotDensity2d(U) fig.show() fig = plt.figure() plotSG2d(dist.grid, dist.alpha, addContour=True, show_negative=True, show_grid_points=True) fig.show() print("2d: mean = %g ~ %g" % (U.mean(), dist.mean())) print("2d: var = %g ~ %g" % (U.var(), dist.var())) plt.show() print("KL = %g" % U.klDivergence(dist, testSamples, testSamples)) print("CE = %g" % dist.crossEntropy(testSamples)) print("MSE = %g" % dist.l2error(U, testSamples, testSamples))
def test2DPPF(self): # prepare data C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) train_samples = U.rvs(1000) fig = plt.figure() plotDensity2d(U) plt.title('true density') fig.show() dist = SGDEdist.byLearnerSGDEConfig(train_samples, config={ "grid_level": 5, "grid_type": "linear", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True }, bounds=U.getBounds()) fig = plt.figure() plotDensity2d(dist) plt.title('estimated SGDE density') fig.show() samples = dists.J([dists.Uniform(0, 1), dists.Uniform(0, 1)]).rvs(1000) fig = plt.figure() plt.plot(samples[:, 0], samples[:, 1], "o ") plt.title('uniformly drawn samples') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() transformed_samples = dist.ppf(samples) fig = plt.figure() plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ") plt.title('transformed samples') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() plt.show()
def test1DCDFandPPF(self): # prepare data U = Normal(0.5, 0.1, 0, 1) train_samples = U.rvs(1000).reshape(1000, 1) dist = SGDEdist.byLearnerSGDEConfig(train_samples, config={ "grid_level": 5, "grid_type": "poly", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True }, bounds=U.getBounds()) fig = plt.figure() plt.hist(train_samples, bins=10, normed=True) plotDensity1d(U) plotDensity1d(dist) plt.title("original space") fig.show() transformed_samples = dist.cdf(train_samples) fig = plt.figure() plt.hist(transformed_samples, bins=10, normed=True) plt.title("uniform space") fig.show() transformed_samples = dist.ppf(transformed_samples) fig = plt.figure() plt.hist(transformed_samples, bins=10, normed=True) plotDensity1d(U) plotDensity1d(dist) plt.title("original space") fig.show() plt.show()
def testExpPoly2d(self): trainSamples = np.loadtxt("exp_2d.csv").T # build parameter set dist_sgde = SGDEdist.byLearnerSGDEConfig( trainSamples, config={ "grid_level": 4, "grid_type": "modpoly", "grid_maxDegree": 6, "refinement_numSteps": 0, "refinement_numPoints": 10, "solver_threshold": 1e-10, "solver_verbose": True, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": False, "sgde_makePositive_candidateSearchAlgorithm": "joined", "sgde_makePositive_interpolationAlgorithm": "setToZero", "sgde_makePositive_verbose": True, "sgde_unitIntegrand": True }) # build parameter set dist_kde = dists.KDEDist( trainSamples, kernelType=KernelType_GAUSSIAN, bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE) # fig = plt.figure() # plotSG2d(dist.grid, dist.alpha, show_grid_points=True) # plt.scatter(trainSamples[:, 0], trainSamples[:, 1], np.zeros(trainSamples.shape[0])) # plt.title("%.12f" % dist.vol) fig, _, _ = plotDensity3d(dist_sgde) plt.title("SGDE: vol=%g" % dist_sgde.vol) fig, _, _ = plotDensity3d(dist_kde) plt.title("KDE: vol=1.0") plt.show()
def estimateSGDEDensity(functionName, trainSamples, testSamples=None, bounds=None, iteration=0, plot=False, out=True, label="sgde_zero", candidates="intersections", interpolation="setToZero"): print("train: %i x %i (mean=%g, var=%g)" % (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples), np.var(trainSamples))) if testSamples is not None: print("test : %i x %i (mean=%g, var=%g)" % (testSamples.shape[0], testSamples.shape[1], np.mean(testSamples), np.var(testSamples))) candidateSearchAlgorithm = strToCandidateSearchAlgorithm(candidates) interpolationAlgorithm = strToInterpolationAlgorithm(interpolation) results = {} crossEntropies = {} config = { "grid_level": 1, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 3, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_enable": True, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": False } pathResults = os.path.join("data", label) key = 1 bestCV = float("Inf") bestDist = None # stats stats = { 'config': { 'functionName': functionName, 'numDims': 2, 'adaptive': True, 'refnums': 0, 'consistentGrid': True, 'candidateSearchAlgorithm': candidates, 'interpolationAlgorithm': interpolation, 'maxNumGridPoints': 0, 'iteration': iteration }, 'trainSamples': trainSamples, 'testSamples': testSamples } for level in range(2, 7): print("-" * 60) print("l=%i" % level) for refinementSteps in range(0, 5): config["grid_level"] = level config["refinement_numSteps"] = refinementSteps sgdeDist = SGDEdist.byLearnerSGDEConfig(trainSamples, config=config, bounds=bounds) # ----------------------------------------------------------- grid, alpha = sgdeDist.grid, sgdeDist.alpha cvSgde = sgdeDist.crossEntropy(testSamples) maxLevel = grid.getStorage().getMaxLevel() numDims = grid.getStorage().getDimension() print(" " + "-" * 30) print(" #ref = %i: gs=%i -> CV test = %g" % (refinementSteps, sgdeDist.grid.getSize(), cvSgde)) # ----------------------------------------------------------- # make it positive positiveGrid = grid.clone() positiveAlpha_vec = DataVector(alpha) opPositive = createOperationMakePositive(candidateSearchAlgorithm, interpolationAlgorithm, True, False) opPositive.makePositive(positiveGrid, positiveAlpha_vec, True) # scale to unit integrand positiveAlpha = positiveAlpha_vec.array() positiveSgdeDist = SGDEdist(positiveGrid, positiveAlpha, trainSamples, bounds=bounds) # ----------------------------------------------------------- cvPositiveSgde = positiveSgdeDist.crossEntropy(testSamples) if plot and numDims == 2: fig = plt.figure() plotSG2d(grid, alpha, show_negative=True, show_grid_points=True) plt.title("pos: N=%i: vol=%g, log=%g" % (positiveGrid.getSize(), doQuadrature(positiveGrid, positiveAlpha), cvPositiveSgde)) plt.tight_layout() if out: plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) else: plt.close(fig) # ----------------------------------------------------------- print(" positive: gs=%i -> CV test = %g" % (positiveGrid.getSize(), cvPositiveSgde)) # ----------------------------------------------------------- # select the best density available based on the given criterion results[key] = {'config': config, 'dist': positiveSgdeDist} crossEntropies[key] = cvPositiveSgde key += 1 candidateSearch = opPositive.getCandidateSetAlgorithm() if cvPositiveSgde < bestCV: bestCV = cvPositiveSgde bestDist = positiveSgdeDist numComparisons = candidateSearch.costsComputingCandidates() # update the stats -> just for the current best one # write the stats of the current best results to the stats dict C = np.ndarray(numDims - 1, dtype="int") M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0]) for d in range(2, numDims + 1): C[d - 2] = binom(M, d) stats['config']['refnums'] = refinementSteps stats['config']['adaptive'] = refinementSteps > 0 stats['negSGDE_json'] = sgdeDist.toJson() stats['posSGDE_json'] = positiveSgdeDist.toJson() stats['level'] = level stats['maxLevel'] = maxLevel stats['fullGridSize'] = (2**maxLevel - 1)**numDims stats['sparseGridSize'] = grid.getSize() stats['discretizedGridSize'] = positiveGrid.getSize() stats['crossEntropyTrainZeroSGDE'] = sgdeDist.crossEntropy( trainSamples) stats[ 'crossEntropyTrainDiscretizedSGDE'] = positiveSgdeDist.crossEntropy( trainSamples) stats['crossEntropyTestZeroSGDE'] = cvSgde stats['crossEntropyTestDiscretizedSGDE'] = cvPositiveSgde stats['numCandidates'] = int(candidateSearch.numCandidates()) stats['numCandidatesPerLevel'] = np.array( candidateSearch.numCandidatesPerLevel().array(), dtype="int") stats['numCandidatesPerIteration'] = np.array( candidateSearch.numCandidatesPerIteration().array(), dtype="int") stats[ 'costsCandidateSearch'] = candidateSearch.costsComputingCandidates( ) stats['costsCandidateSearchBinomial'] = int(C.sum()) stats['costsCandidateSearchPerIteration'] = np.array( candidateSearch.costsComputingCandidatesPerIteration( ).array(), dtype="int") stats['costsCandidateSearchPerIterationBinomial'] = C if plot and numDims == 2: fig = plt.figure() plotSG2d( positiveGrid, positiveAlpha, show_negative=True, show_grid_points=False, colorbarLabel= r"$f_{\mathcal{I}^\text{SG} \cup \mathcal{I}^\text{ext}}$" ) plt.title(r"positive: $N=%i/%i$; \# comparisons$=%i$" % (positiveGrid.getSize(), (2**maxLevel - 1)**numDims, numComparisons)) plt.xlabel(r"$\xi_1$") plt.ylabel(r"$\xi_2$") # plt.title(r"N=%i $\rightarrow$ %i: log=%g $\rightarrow$ %g" % (sgdeDist.grid.getSize(), # positiveSgdeDist.grid.getSize(), # cvSgde, # cvPositiveSgde)) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) fig, ax, _ = plotSG3d(positiveGrid, positiveAlpha) ax.set_zlabel( r"$f_{\mathcal{I}^{\text{SG}} \cup \mathcal{I}^\text{ext}}(\xi_1, \xi_2)$", fontsize=20) ax.set_xlabel(r"$\xi_1$", fontsize=20) ax.set_ylabel(r"$\xi_2$", fontsize=20) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) if plot and numDims == 2 and not out: plt.show() if out: # save stats filename = os.path.join( "data", label, "stats_d%i_a%i_r%i_i%i_%s_%s.pkl" % (numDims, 1, refinementSteps, iteration, candidates, interpolation)) fd = open(filename, "w") pkl.dump(stats, fd) fd.close() print("stats saved to -> '%s'" % filename) # dictionary that stores the information on the estimated densities myjson = { "Grid": { "dimNames": ["phi", "log(K_A)"], "matrixEntries": ["phi", "log(K_A)"] }, "Set": { "path": "", "grids": [], "alphas": [], "paramValues": [], "paramName": "grid_size" } } for key, result in list(results.items()): config = result['config'] dist = result['dist'] # serialize grid and coefficients out = "sgde.i%i.k%i.N%i" % (iteration, key, dist.grid.getSize()) out_grid = os.path.join(pathResults, "%s.grid" % out) out_alpha = os.path.join(pathResults, "%s.alpha.arff" % out) writeGrid(out_grid, dist.grid) writeAlphaARFF(out_alpha, dist.alpha) # collect information for json myjson["Set"]["grids"].append(os.path.abspath(out_grid)) myjson["Set"]["alphas"].append(os.path.abspath(out_alpha)) myjson["Set"]["paramValues"].append(crossEntropies[key]) # ----------------------------------------------------------- # serialize the config out_config = os.path.join(pathResults, "sgde.i%i.k%i.config" % (iteration, key)) fd = open(out_config, "w") json.dump(config, fd, ensure_ascii=True, indent=True) fd.close() crossEntropies[key] = (crossEntropies[key], out_grid, out_alpha, out_config) # sort the results in myjson according to the cross entropy ixs = np.argsort(myjson["Set"]["paramValues"]) myjson["Set"]["grids"] = [myjson["Set"]["grids"][ix] for ix in ixs] myjson["Set"]["alphas"] = [myjson["Set"]["alphas"][ix] for ix in ixs] myjson["Set"]["paramValues"] = [ myjson["Set"]["paramValues"][ix] for ix in ixs ] # serialize myjson out_config = os.path.join(pathResults, "sgde_visualization.i%i.config" % iteration) fd = open(out_config, "w") json.dump(myjson, fd, ensure_ascii=True, indent=True) fd.close() # serialize cross entropies out_crossEntropies = os.path.join( pathResults, "sgde_cross_entropies.i%i.csv" % iteration) fd = open(out_crossEntropies, 'wb') file_writer = csv.writer(fd) file_writer.writerow(["crossEntropy", "grid", "alpha", "sgdeConfig"]) for out in list(crossEntropies.values()): file_writer.writerow(out) fd.close() # serialize samples np.savetxt( os.path.join(pathResults, "sgde_train_samples.i%i.csv" % iteration), trainSamples) np.savetxt( os.path.join(pathResults, "sgde_test_samples.i%i.csv" % iteration), testSamples) # serialize best configuration to json out_bestDist = os.path.join(pathResults, "sgde_best_config.i%i.json" % iteration) text = bestDist.toJson() fd = open(out_bestDist, "w") fd.write(text) fd.close() return bestDist, stats
def test2DCovarianceMatrix(self): # prepare data np.random.seed(1234567) C = np.array([[0.3, 0.09], [0.09, 0.3]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) samples = U.rvs(2000) kde = KDEDist(samples) sgde = SGDEdist.byLearnerSGDEConfig( samples, bounds=U.getBounds(), config={ "grid_level": 5, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 10, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_lambda": 3.16228e-06, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": False, "sgde_makePositive": True, "sgde_makePositive_candidateSearchAlgorithm": "joined", "sgde_makePositive_interpolationAlgorithm": "setToZero", "sgde_generateConsistentGrid": True, "sgde_unitIntegrand": True }) sgde_x1 = sgde.marginalizeToDimX(0) sgde_x2 = sgde.marginalizeToDimX(1) plt.figure() plotDensity1d(sgde_x1, label="x1") plotDensity1d(sgde_x2, label="x2") plt.title( "mean: x1=%g, x2=%g; var: x1=%g, x2=%g" % (sgde_x1.mean(), sgde_x2.mean(), sgde_x1.var(), sgde_x2.var())) plt.legend() jsonStr = sgde.toJson() jsonObject = json.loads(jsonStr) sgde = Dist.fromJson(jsonObject) fig = plt.figure() plotDensity2d(U, addContour=True) plt.title("analytic") fig = plt.figure() plotDensity2d(kde, addContour=True) plt.title("kde") fig = plt.figure() plotDensity2d(sgde, addContour=True) plt.title("sgde (I(f) = %g)" % (doQuadrature(sgde.grid, sgde.alpha), )) # print the results print("E(x) ~ %g ~ %g" % (kde.mean(), sgde.mean())) print("V(x) ~ %g ~ %g" % (kde.var(), sgde.var())) print("-" * 60) print(kde.cov()) print(sgde.cov()) self.assertTrue(np.linalg.norm(C - kde.cov()) < 1e-2, "KDE cov wrong") self.assertTrue( np.linalg.norm(np.corrcoef(samples.T) - kde.corrcoeff()) < 1e-1, "KDE corrcoef wrong") plt.show()
def test2DCDFandPPF(self, plot=True): # prepare data C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) train_samples = U.rvs(1000) if plot: fig = plt.figure() plotDensity2d(U) plt.title('true density') fig.show() dist = SGDEdist.byLearnerSGDEConfig(train_samples, config={ "grid_level": 5, "grid_type": "polyClenshawCurtis", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": False }, bounds=U.getBounds()) if plot: fig = plt.figure() plotDensity2d(dist) plt.title('estimated SGDE density') fig.show() samples = dists.J([dists.Uniform(0, 1), dists.Uniform(0, 1)]).rvs(500) if plot: fig = plt.figure() plt.plot(samples[:, 0], samples[:, 1], "o ") plt.title('u space') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() else: print("-" * 80) print(samples) transformed_samples = dist.ppf(samples, shuffle=False) if plot: fig = plt.figure() plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ") plt.title('x space (transformed)') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() else: print("-" * 80) print(transformed_samples) samples = dist.cdf(transformed_samples, shuffle=False) if plot: fig = plt.figure() plt.plot(samples[:, 0], samples[:, 1], "o ") plt.title('u space (transformed)') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() plt.show() else: print("-" * 80) print(samples)
def test2DNormalMoments(self): mean = 0 var = 0.5 U = dists.J( [dists.Normal(mean, var, -2, 2), dists.Normal(mean, var, -2, 2)]) np.random.seed(1234567) trainSamples = U.rvs(1000) dist = SGDEdist.byLearnerSGDEConfig(trainSamples, config={ "grid_level": 5, "grid_type": "linear", "refinement_numSteps": 0, "refinement_numPoints": 10, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": True }, bounds=U.getBounds()) samples_dist = dist.rvs(1000, shuffle=True) kde = KDEDist(trainSamples) samples_kde = kde.rvs(1000, shuffle=True) # ----------------------------------------------- self.assertTrue( np.abs(U.mean() - dist.mean()) < 1e-2, "SGDE mean wrong") self.assertTrue( np.abs(U.var() - dist.var()) < 4e-2, "SGDE variance wrong") # ----------------------------------------------- # print the results print("E(x) ~ %g ~ %g" % (kde.mean(), dist.mean())) print("V(x) ~ %g ~ %g" % (kde.var(), dist.var())) print( "log ~ %g ~ %g" % (kde.crossEntropy(trainSamples), dist.crossEntropy(trainSamples))) print("-" * 60) print(dist.cov()) print(kde.cov()) sgde_x1 = dist.marginalizeToDimX(0) kde_x1 = kde.marginalizeToDimX(0) plt.figure() plotDensity1d(U.getDistributions()[0], label="analytic") plotDensity1d(sgde_x1, label="sgde") plotDensity1d(kde_x1, label="kde") plt.title("mean: sgde=%g, kde=%g; var: sgde=%g, kde=%g" % (sgde_x1.mean(), kde_x1.mean(), sgde_x1.var(), kde_x1.var())) plt.legend() fig = plt.figure() plotDensity2d(U, addContour=True) plt.title("analytic") fig = plt.figure() plotDensity2d(kde, addContour=True) plt.scatter(samples_kde[:, 0], samples_kde[:, 1]) plt.title("kde") fig = plt.figure() plotDensity2d(dist, addContour=True) plt.scatter(samples_dist[:, 0], samples_dist[:, 1]) plt.title( "sgde (I(f) = %g)" % (np.prod(U.getBounds()) * doQuadrature(dist.grid, dist.alpha), )) plt.show()
# -------------------- prepare data C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10. m = np.array([0.5, 0.5]) U = MultivariateNormal(m, C, 0, 1) np.random.seed(12345) samples = U.rvs(1000) testSamples = U.rvs(1000) # ---------- using SGDE from SG++ ------------------------ dist = SGDEdist.byLearnerSGDEConfig(samples, config={"grid_level": 6, "grid_type": "Linear", "refinement_numSteps": 0, "refinement_numPoints": 3, "regularization_type": "Laplace", "crossValidation_lambda": 0.000562341, "crossValidation_enable": False, "crossValidation_kfold": 5, "crossValidation_silent": False}, bounds=U.getBounds()) fig, ax = plotDensity3d(U) ax.set_title("true density") fig.show() fig, ax, _ = plotSG3d(dist.grid, dist.alpha) ax.set_title("estimated density") fig.show() print("mean = %g ~ %g" % (m.prod(), dist.mean())) print("var = %g ~ %g" % (np.var(testSamples), dist.var()))
def test_sgdeLaplace(): l2_samples = 10000 # sample_range = np.arange(10, 500, 50) sample_range = [10, 20, 50, 100, 200, 500] points = {} grids = ["linear", "modlinear", # keine OperationQuadrature "poly", "modpoly", "polyBoundary", "polyClenshawCurtis", "modPolyClenshawCurtis", "polyClenshawCurtisBoundary", "bsplineClenshawCurtis", "modBsplineClenshawCurtis" # keine OperationMultipleEval ] U = dists.J([dists.Lognormal.by_alpha(0.5, 0.1, 0.001), dists.Lognormal.by_alpha(0.5, 0.1, 0.001)]) l2_errors = {} for grid in grids: l2_errors[grid] = [] points[grid] = [] l2_errors["kde"] = [] samples = 1000 for samples in sample_range: # for lvl in range(5, 6): trainSamples = U.rvs(samples) # testSamples = U.rvs(l2_samples) for grid_name in grids: # build parameter set print("--------------------Samples: {} Grid: {}--------------------".format(samples, grid_name)) dist_sgde = SGDEdist.byLearnerSGDEConfig(trainSamples, bounds=U.getBounds(), unitIntegrand=True, config={"grid_level": 1, "grid_type": grid_name, "grid_maxDegree": 6, "refinement_numSteps": 0, "refinement_numPoints": 10, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_lambda": 1e-6, "crossValidation_enable": True, "crossValidation_kfold": 4, "crossValidation_lambdaSteps": 10, "crossValidation_silent": False}) points[grid_name].append(dist_sgde.grid.getSize()) # l2_errors[grid_name].append(dist_sgde.l2error(U, testSamplesUnit=testSamples)) l2_errors[grid_name].append(dist_sgde.l2error(U, n=l2_samples)) # plt.figure() # plotDensity2d(U, levels=(10, 20, 40, 50, 60)) # plt.figure() # plotDensity2d(dist_sgde, levels=(10, 20, 40, 50, 60)) # plt.show() dist_kde = dists.KDEDist(trainSamples, kernelType=KernelType_GAUSSIAN, bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE) l2_errors["kde"].append(dist_kde.l2error(U, testSamplesUnit=testSamples)) for grid_name in grids: plt.plot(sample_range, l2_errors[grid_name], label=grid_name) # plt.plot(points[grid], l2_errors[grid_name],".-", label=grid_name) plt.plot(sample_range, l2_errors["kde"], label="KDE") # plt.plot([x for x in range(1,300, 100)], [l2_errors["kde"][0] for i in range(1,4)], label="KDE") plt.xlabel("# Gitterpunkte") plt.ylabel("L2-Fehler") plt.yscale("log") plt.legend() plt.show()