def computeSystemMatrixForVarianceProjected(self, gs, gpsi, basisi, gpsj, basisj, dist, trans, dims): if isinstance(dist, SGDEdist): # project distribution on desired dimensions # get the objects needed for integrating # the current dimensions gpsk, basisk = project(dist.grid, list(range(len(dims)))) # compute the bilinear form # -> the measure needs to be uniform, since it is already # encoded in the sparse grid density self.trilinearForm.setDistributionAndTransformation( [Uniform(0, 1)] * gs.getDimension(), None) A_idim, erri = self.trilinearForm.computeTrilinearFormByList( gs, gpsk, basisk, dist.alpha, gpsi, basisi, gpsj, basisj) else: # the distribution is given analytically, handle them # analytically in the integration of the basis functions if isinstance(dist, Dist) and len(dims) > 1: # print "WARNINING: results are just approximated -> not independent random variables" # marginalize the densities and continue marg_dists = [None] * len(dims) for i, idim in enumerate(dims): marg_dists[i] = dist.marginalizeToDimX(idim) dist = marg_dists trans = trans.getTransformations() if isinstance(dist, Dist): dist = [dist] trans = [trans] self.bilinearForm.setDistributionAndTransformation(dist, trans) A_idim, erri = self.bilinearForm.computeBilinearFormByList( gs, gpsi, basisi, gpsj, basisj) return A_idim, erri
def computeSystemMatrixForMeanProjected(self, gs, gpsi, basisi, dist, trans, dims): if isinstance(dist, SGDEdist): # if the distribution is given as a sparse grid function we # need to compute the bilinear form of the grids # accumulate objects needed for computing the bilinear form assert len(dims) == dist.grid.getStorage().getDimension() gpsj, basisj = project(dist.grid, list(range(len(dims)))) # compute the bilinear form # -> the measure needs to be uniform, since it is already # encoded in the sparse grid density self.bilinearForm.setDistributionAndTransformation( [Uniform(0, 1)] * gs.getDimension(), None) A, erri = self.bilinearForm.computeBilinearFormByList( gs, gpsi, basisi, gpsj, basisj) # weight it with the coefficient of the density function tmp = A.dot(dist.alpha) else: # the distribution is given analytically, handle them # analytically in the integration of the basis functions if isinstance(dist, Dist) and len(dims) > 1: # print "WARNINING: results are just approximated -> not independent random variables" # marginalize the densities and continue marg_dists = [None] * len(dims) for i, idim in enumerate(dims): marg_dists[i] = dist.marginalizeToDimX(idim) dist = marg_dists trans = trans.getTransformations() if isinstance(dist, Dist): dist = [dist] trans = [trans] self.linearForm.setDistributionAndTransformation(dist, trans) tmp, erri = self.linearForm.computeLinearFormByList( gs, gpsi, basisi) return tmp, erri
def run_densityEstimation( functionName, method, kfold=20, numDims=2, numSamples=1000, candidates="join", bandwidthOptimizationType=BandwidthOptimizationType_SILVERMANSRULE, out=True, plot=False, tikz=False): if method == "sgde_zero": interpolation = "zero" else: # interpolation == "boundaries": interpolation = "boundaries" samples, bounds, natafType = load_data_set(functionName, numSamples, numDims) # do kfold cross validation crossEntropyValidation = np.zeros((kfold, 2)) learnSamples, validationSamples = splitset(samples, splitPercentage=0.7) stats = {} for i in range(kfold): print("=" * 100) print("run (%s)= %i/%i" % (method, i + 1, kfold)) print("=" * 100) print("valid: %i x %i (mean=%g, var=%g)" % (validationSamples.shape[0], validationSamples.shape[1], np.mean(validationSamples), np.var(validationSamples))) np.random.seed(i * 123456 + i % 2) trainSamples, testSamples = splitset(learnSamples, splitPercentage=1. - 1. / kfold) if "sgde" in method: dist, stats[i] = estimateSGDEDensity(functionName, trainSamples, testSamples, bounds=bounds, iteration=i, plot=plot, label=method, out=out, candidates=candidates, interpolation=interpolation) elif "kde" in method: dist, stats[i] = estimateKDEDensity( functionName, trainSamples, testSamples, iteration=i, plot=plot, label=method, out=out, bandwidthOptimizationTypeStr=bandwidthOptimizationType) elif "nataf" in method: # estimate nataf density dist, stats[i] = estimateNatafDensity(functionName, natafType, testSamples, iteration=i, bounds=bounds, plot=plot, label=method, out=out) else: raise AttributeError("unknown config '%s'" % method) # evaluate the distribution according to the validation set crossEntropyValidation[i, 0] = i crossEntropyValidation[i, 1] = dist.crossEntropy(validationSamples) stats[i]["crossEntropyValidation"] = dist.crossEntropy( validationSamples) stats[i]["validationSamples"] = validationSamples stats[i]["samples"] = {"shuffled": {}, "not_shuffled": {}} stats[i]["samples"]["shuffled"]["rvs"] = dist.rvs(numSamples, shuffle=True) stats[i]["samples"]["shuffled"]["uniform_validation"] = dist.cdf( validationSamples, shuffle=True) kstests = [None] * numDims for idim in range(numDims): samples1d = stats[i]["samples"]["shuffled"][ "uniform_validation"][:, idim] res_test = kstest(samples1d, Uniform(0, 1).cdf) kstests[idim] = res_test.statistic, res_test.pvalue if plot: plt.figure() plt.hist(samples1d, cumulative=True, normed=True) xs = np.linspace(0, 1, 10) plt.plot(xs, [Uniform(0, 1).cdf(xi) for xi in xs]) plt.title("shuffled: %i, %s" % (idim, kstests[idim])) print("-" * 80) print("shuffled ", kstests, np.min(kstests), np.max(kstests)) if plot: plt.show() stats[i]["samples"]["shuffled"]["kstests"] = kstests stats[i]["samples"]["not_shuffled"]["rvs"] = dist.rvs(numSamples, shuffle=False) stats[i]["samples"]["not_shuffled"]["uniform_validation"] = dist.cdf( validationSamples, shuffle=False) kstests = [None] * numDims for idim in range(numDims): samples1d = stats[i]["samples"]["not_shuffled"][ "uniform_validation"][:, idim] res_test = kstest(samples1d, Uniform(0, 1).cdf) kstests[idim] = res_test.statistic, res_test.pvalue if plot: plt.figure() plt.hist(samples1d, cumulative=True, normed=True) xs = np.linspace(0, 1, 1000) plt.plot(xs, [Uniform(0, 1).cdf(xi) for xi in xs]) plt.title("not shuffled: %i, %s" % (idim, kstests[idim])) print("not shuffled", kstests, np.min(kstests), np.max(kstests)) if plot: plt.show() stats[i]["samples"]["not_shuffled"]["kstests"] = kstests print("CV valid = %g" % crossEntropyValidation[i, 1]) # write results to file if out: out_crossEntropy = os.path.join( "data", method, "%s.%s.validation.cross_entropies.csv" % (method, functionName)) np.savetxt(out_crossEntropy, crossEntropyValidation[:i, :]) # save stats to pickle out_stats = os.path.join( "data", method, "%s.%s.best.stats.pkl" % (method, functionName)) fd = open(out_stats, "w") pkl.dump(stats, fd) fd.close()