def test2DCovarianceMatrix(self): # prepare data C = np.array([[0.1, 0.08, 0.02], [0.08, 0.1, 0.02], [0.02, 0.02, 0.1]]) / 10. U = dists.MultivariateNormal([0.5, 0.5, 0.5], C, 0, 1) samples = U.rvs(20000) dist = KDEDist(samples, kernelType=KernelType_EPANECHNIKOV, bounds=U.getBounds()) # print the results self.assertTrue(np.linalg.norm(C - dist.cov()) < 1e-2, "KDE cov wrong") self.assertTrue(np.linalg.norm(np.corrcoef(samples.T) - dist.corrcoeff()) < 1e-1, "KDE corrcoef wrong")
def test2DNormalMoments(self): mean = 0 var = 0.5 U = dists.J([dists.Normal(mean, var, -2, 2), dists.Normal(mean, var, -2, 2)]) trainSamples = U.rvs(10000) dist = KDEDist(trainSamples) # ----------------------------------------------- self.assertTrue(np.abs(U.mean() - dist.mean()) < 1e-2, "KDE mean wrong") self.assertTrue(np.abs(U.var() - dist.var()) < 1e-2, "KDE variance wrong")
def test2DCDFandPPF(self): # prepare data C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) train_samples = U.rvs(1000) fig = plt.figure() plotDensity2d(U) plt.title('true density') fig.show() dist = KDEDist(train_samples, bounds=U.getBounds()) fig = plt.figure() plotDensity2d(dist) plt.title('estimated KDE density') fig.show() samples = dists.J([dists.Uniform(0, 1), dists.Uniform(0, 1)]).rvs(1000) fig = plt.figure() plt.plot(samples[:, 0], samples[:, 1], "o ") plt.title('u space') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() transformed_samples = dist.ppf(samples) fig = plt.figure() plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ") plt.title('x space (transformed)') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() samples = dist.cdf(transformed_samples) fig = plt.figure() plt.plot(samples[:, 0], samples[:, 1], "o ") plt.title('u space (transformed)') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() plt.show()
def test1DNormalDist(self): # prepare data U = dists.Normal(1.85, .3, 0, 3) trainSamples = np.array([U.rvs(500)]).T testSamples = np.array([U.rvs(1000)]).T # build parameter set dist = KDEDist(trainSamples, kernelType=KernelType_GAUSSIAN, bandwidthOptimizationType= BandwidthOptimizationType_MAXIMUMLIKELIHOOD, bounds=U.getBounds()) # fig = plt.figure() # plotDensity1d(U) # plotDensity1d(dist) print("quad = %s" % (quad(lambda x: dist.pdf([x]), 0, 3), )) print("mean = %g ~ %g" % (U.mean(), dist.mean())) print("var = %g ~ %g" % (U.var(), dist.var())) print("KL = %g" % U.klDivergence(dist, testSamples, testSamples)) print("CE = %g" % dist.crossEntropy(testSamples)) print("MSE = %g" % dist.l2error(U, testSamples, testSamples)) plt.show()
def test1DCDFandPPF(self): # prepare data U = Normal(0.5, 0.1, 0, 1) train_samples = U.rvs(1000).reshape(1000, 1) dist = KDEDist(train_samples, kernelType=KernelType_EPANECHNIKOV) rc('font', **{'size': 18}) fig = plt.figure() x = np.linspace(0, 1, 1000) plt.plot(x, dist.cdf(x), label="estimated") plt.plot(x, [U.cdf(xi) for xi in x], label="analytic") plt.legend(loc="lower right") fig.show() fig = plt.figure() plt.hist(train_samples, normed=True) plotDensity1d(U, label="analytic") plotDensity1d(dist, label="estimated") plt.title("original space") plt.legend() fig.show() transformed_samples = dist.cdf(train_samples) fig = plt.figure() plt.hist(transformed_samples, normed=True) plt.title("uniform space") fig.show() transformed_samples = dist.ppf(transformed_samples) fig = plt.figure() plt.hist(transformed_samples, normed=True) plotDensity1d(U, label="analytic") plotDensity1d(dist, label="estimated") plt.title("original space") plt.legend() fig.show() plt.show()
def test2DMarginalize(self): # prepare data C = np.array([[0.2, 0.08], [0.08, 0.2]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) fig = plt.figure() plotDensity2d(U) plt.title('true density') fig.show() samples = U.rvs(1000) kde = KDEDist(samples) # fig = plt.figure() # plotDensity2d(kde) # plt.title('estimated KDE density') # fig.show() # marginalize opMarg = createOperationDensityMarginalizeKDE(kde.dist) kdeX = kde.marginalizeToDimX(0) kdeY = kde.marginalizeToDimX(1) fig = plt.figure() plotDensity1d(kdeX) plotDensity1d(kdeY) plt.title('margToDimX denstities') fig.show() kdeX = kde.marginalize(1) kdeY = kde.marginalize(0) fig = plt.figure() plotDensity1d(kdeX) plotDensity1d(kdeY) plt.title('doMarginalize denstities') fig.show() plt.show()
def test2DPPF(self): # prepare data C = np.array([[0.1, 0.08], [0.08, 0.1]]) / 10. U = dists.MultivariateNormal([0.5, 0.5], C, 0, 1) fig = plt.figure() plotDensity2d(U) plt.title('true density') fig.show() dist = KDEDist(U.rvs(1000), kernelType=KernelType_EPANECHNIKOV, bounds=U.getBounds()) fig = plt.figure() plotDensity2d(dist) plt.title('estimated KDE density') fig.show() samples = dists.J([dists.Uniform(0, 1), dists.Uniform(0, 1)]).rvs(1000) fig = plt.figure() plt.plot(samples[:, 0], samples[:, 1], "o ") plt.title('uniformly drawn samples') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() transformed_samples = dist.ppf(samples) fig = plt.figure() plt.plot(transformed_samples[:, 0], transformed_samples[:, 1], "o ") plt.title('transformed samples') plt.xlim(0, 1) plt.ylim(0, 1) fig.show() plt.show()
def estimateKDEDensity(functionName, trainSamples, testSamples=None, iteration=0, plot=False, out=True, label="kde_gaussian", bandwidthOptimizationTypeStr="rot"): print("train: %i x %i (mean=%g, var=%g)" % (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples), np.var(trainSamples))) if testSamples is not None: print("test : %i x %i (mean=%g, var=%g)" % (testSamples.shape[0], testSamples.shape[1], np.mean(testSamples), np.var(testSamples))) if "gaussian" in label: kernelType = KernelType_GAUSSIAN elif "epanechnikov" in label: kernelType = KernelType_EPANECHNIKOV else: raise AttributeError("label is unknown") bandwidthOptimizationType = strTobandwidthOptimizationType( bandwidthOptimizationTypeStr) kdeDist = KDEDist(trainSamples, kernelType=kernelType, bandwidthOptimizationType=bandwidthOptimizationType) # ----------------------------------------------------------- cvKDE = kdeDist.crossEntropy(testSamples) if plot and kdeDist.getDim() == 2: fig = plt.figure() plotDensity2d(kdeDist) plt.title("log=%g" % cvKDE) if out: plt.tight_layout() plt.savefig( os.path.join(pathResults, "kde_dist.%s.i%i.jpg" % (functionName, iteration))) plt.savefig( os.path.join(pathResults, "kde_dist.%s.i%i.pdf" % (functionName, iteration))) if out: plt.close(fig) else: plt.show() print("CV test = %g" % cvKDE) # ----------------------------------------------------------- if out: pathResults = os.path.join("data", label) # serialize cross entropies out_crossEntropies = os.path.join( pathResults, "kde_cross_entropies.%s.i%i.csv" % (functionName, iteration)) fd = open(out_crossEntropies, 'wb') file_writer = csv.writer(fd) file_writer.writerow(["crossEntropy"]) file_writer.writerow([cvKDE]) fd.close() # serialize samples np.savetxt( os.path.join( pathResults, "kde_train_samples.%s.i%i.csv" % (functionName, iteration)), trainSamples) np.savetxt( os.path.join( pathResults, "kde_test_samples.%s.i%i.csv" % (functionName, iteration)), testSamples) if plot: # plot density fig = plt.figure() plotDensity2d(kdeDist) plt.title("%s -> CV = %g" % (kdeDist.getBandwidths(), cvKDE)) plt.savefig( os.path.join(pathResults, "kde_pdf.%s.i%i.jpg" % (functionName, iteration))) plt.close(fig) # serialize best configuration to json out_bestDist = os.path.join( pathResults, "kde_best_config.%s.i%i.json" % (functionName, iteration)) text = kdeDist.toJson() fd = open(out_bestDist, "w") fd.write(text) fd.close() # stats stats = { 'config': { 'functionName': functionName, 'numDims': 2, 'label': label, 'bandwidth_optimization': BandwidthOptimizationType_MAXIMUMLIKELIHOOD, 'kernelType': kernelType, 'iteration': iteration }, 'trainSamples': trainSamples, 'testSamples': testSamples, 'crossEntropyTrainKDE': kdeDist.crossEntropy(trainSamples), 'crossEntropyTestKDE': cvKDE, 'KDEDist_json': kdeDist.toJson() } return kdeDist, stats
def __init__(self, data, sample_type=None, dist=None): from pysgpp.extensions.datadriven.uq.dists import Uniform, Beta, SGDEdist, Normal, KDEDist from pysgpp.extensions.datadriven.uq.quadrature.marginalization.marginalization import doMarginalize # fix stochastic setting self.alpha, self.beta = 5., 10. self.lwr, self.upr = 0., 1. self.normal = Normal(0, 1, -2, 2) self.uniform = Uniform(self.lwr, self.upr) self.b = Beta(self.alpha, self.beta, self.lwr, self.upr) self.dim = data.shape[0] if sample_type == 'cbeta': # marginalize the density opMar = createOperationDensityMargTo1DKDE(dist.dist) kdex = KernelDensityEstimator() opMar.margToDimX(kdex, 0) kdey = KernelDensityEstimator() opMar.margToDimX(kdey, 1) # set the mean vector and the correlation matrix self.x = [ KDEDist(kdex.getSamples().array()), KDEDist(kdey.getSamples().array()) ] self.M = np.array([[kdex.mean(), kdey.mean()]]).T self.S = dist.corrcoeff() else: self.x = [self.b, self.b] self.M = np.array([[self.b.mean(), self.b.mean()]]).T self.S = np.array([[1., 0.], [0., 1.]]) # compute the correlation matrix from the covariance matrix # this is used to transform the results back to the original space self.D = np.diag(np.sqrt(np.diag(self.S))) # divide the diagonal by the standard deviation of the diagonal elements self.D_inverse = np.diag(1. / np.sqrt(np.diag(self.S))) self.C = self.D_inverse.dot(self.S.dot(self.D_inverse)) # fig = plt.figure() # plotDensity1d(self.x[0]) # plotDensity1d(self.b) # fig.show() # # fig = plt.figure() # plotDensity1d(self.x[1]) # plotDensity1d(self.b) # fig.show() # compute cholesky decomposition self.L = np.linalg.cholesky(self.C) # adjust it according to [Lu ...] # nothing needs to be done for uniform <--> uniform self.L = self.L self.L_inverse = np.linalg.inv(self.L) assert abs(np.sum(self.C - self.L.dot(self.L.T))) < 1e-14 assert abs( np.sum(self.S - self.D.dot(self.L.dot(self.L.T.dot(self.D))))) < 1e-14