def sampleGrids(self, filename): ts = self.__learner.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t) # init gs = grid.getStorage() dim = gs.dim() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data.transpose() data.appendRow() data.setRow(data.getNrows() - 1, res) data.transpose() # write results writeDataARFF({ 'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data, 'names': names }) # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = DataMatrix(gs.size(), dim) data.setAll(0.0) for i in xrange(gs.size()): gp = gs.get(i) v = np.array([gp.getCoord(j) for j in xrange(dim)]) data.setRow(i, DataVector(v)) # write results writeDataARFF({ 'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data, 'names': names }) # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def sampleGrids(self, filename): ts = self.__uqManager.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t) # init gs = grid.getStorage() dim = gs.getDimension() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data = np.vstack((data.T, res)).T # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = np.ndarray((gs.getSize(), dim)) x = DataVector(dim) for i in range(gs.getSize()): gp = gs.getPoint(i) gs.getCoordinates(gp, x) data[i, :] = x.array() # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def sampleGrids(self, filename): ts = self.__learner.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction(self._qoi, t) # init gs = grid.getStorage() dim = gs.dim() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data.transpose() data.appendRow() data.setRow(data.getNrows() - 1, res) data.transpose() # write results writeDataARFF({'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data, 'names': names}) # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = DataMatrix(gs.size(), dim) data.setAll(0.0) for i in xrange(gs.size()): gp = gs.get(i) v = np.array([gp.getCoord(j) for j in xrange(dim)]) data.setRow(i, DataVector(v)) # write results writeDataARFF({'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data, 'names': names}) # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def writeCheckpoints(self, filename): ts = self.__uqManager.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for iteration in range(self.__knowledge.getIteration() + 1): # myjson = {"Grid": {"dimNames": ["E", "K_1c", "rho", "n"], # "matrixEntries": ["E", "K_1c", "rho", "n"]}, # "Set": {"path": "", # "grids": [], # "alphas": [], # "paramValues": list(ts), # "paramName": "Time"}} myjson = { "Grid": { "dimNames": ["phi", "e", "K_L"], "matrixEntries": ["phi", "e", "K_L"] }, "Set": { "path": "", "grids": [], "alphas": [], "paramValues": list(ts), "paramName": "Time" } } for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t, iteration=iteration) out = "%s.t%f.i%i" % (filename, t, iteration) out_grid = "%s.grid" % out out_alpha = "%s.alpha.arff" % out writeGrid(out_grid, grid) writeAlphaARFF(out_alpha, surplus) # collect information for json myjson["Set"]["grids"].append(os.path.abspath(out_grid)) myjson["Set"]["alphas"].append(os.path.abspath(out_alpha)) # write json to file fd = open("%s.%i.json" % (filename, iteration), "w") json.dump(myjson, fd, indent=2) fd.close()
def writeCheckpoints(self, filename): ts = self.__learner.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for iteration in xrange(self.__knowledge.getIteration() + 1): # myjson = {"Grid": {"dimNames": ["E", "K_1c", "rho", "n"], # "matrixEntries": ["E", "K_1c", "rho", "n"]}, # "Set": {"path": "", # "grids": [], # "alphas": [], # "paramValues": list(ts), # "paramName": "Time"}} myjson = {"Grid": {"dimNames": ["phi", "e", "K_L"], "matrixEntries": ["phi", "e", "K_L"]}, "Set": {"path": "", "grids": [], "alphas": [], "paramValues": list(ts), "paramName": "Time"}} for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction(self._qoi, t, iteration=iteration) out = "%s.t%f.i%i" % (filename, t, iteration) out_grid = "%s.grid" % out out_alpha = "%s.alpha.arff" % out writeGrid(out_grid, grid) writeAlphaARFF(out_alpha, surplus) # collect information for json myjson["Set"]["grids"].append(os.path.abspath(out_grid)) myjson["Set"]["alphas"].append(os.path.abspath(out_alpha)) # write json to file fd = open("%s.%i.json" % (filename, iteration), "w") json.dump(myjson, fd, indent=2) fd.close()
def estimateSGDEDensity(functionName, trainSamples, testSamples=None, bounds=None, iteration=0, plot=False, out=True, label="sgde_zero", candidates="intersections", interpolation="setToZero"): print("train: %i x %i (mean=%g, var=%g)" % (trainSamples.shape[0], trainSamples.shape[1], np.mean(trainSamples), np.var(trainSamples))) if testSamples is not None: print("test : %i x %i (mean=%g, var=%g)" % (testSamples.shape[0], testSamples.shape[1], np.mean(testSamples), np.var(testSamples))) candidateSearchAlgorithm = strToCandidateSearchAlgorithm(candidates) interpolationAlgorithm = strToInterpolationAlgorithm(interpolation) results = {} crossEntropies = {} config = { "grid_level": 1, "grid_type": "linear", "grid_maxDegree": 1, "refinement_numSteps": 0, "refinement_numPoints": 3, "solver_threshold": 1e-10, "solver_verbose": False, "regularization_type": "Laplace", "crossValidation_enable": True, "crossValidation_kfold": 5, "crossValidation_silent": True, "sgde_makePositive": False } pathResults = os.path.join("data", label) key = 1 bestCV = float("Inf") bestDist = None # stats stats = { 'config': { 'functionName': functionName, 'numDims': 2, 'adaptive': True, 'refnums': 0, 'consistentGrid': True, 'candidateSearchAlgorithm': candidates, 'interpolationAlgorithm': interpolation, 'maxNumGridPoints': 0, 'iteration': iteration }, 'trainSamples': trainSamples, 'testSamples': testSamples } for level in range(2, 7): print("-" * 60) print("l=%i" % level) for refinementSteps in range(0, 5): config["grid_level"] = level config["refinement_numSteps"] = refinementSteps sgdeDist = SGDEdist.byLearnerSGDEConfig(trainSamples, config=config, bounds=bounds) # ----------------------------------------------------------- grid, alpha = sgdeDist.grid, sgdeDist.alpha cvSgde = sgdeDist.crossEntropy(testSamples) maxLevel = grid.getStorage().getMaxLevel() numDims = grid.getStorage().getDimension() print(" " + "-" * 30) print(" #ref = %i: gs=%i -> CV test = %g" % (refinementSteps, sgdeDist.grid.getSize(), cvSgde)) # ----------------------------------------------------------- # make it positive positiveGrid = grid.clone() positiveAlpha_vec = DataVector(alpha) opPositive = createOperationMakePositive(candidateSearchAlgorithm, interpolationAlgorithm, True, False) opPositive.makePositive(positiveGrid, positiveAlpha_vec, True) # scale to unit integrand positiveAlpha = positiveAlpha_vec.array() positiveSgdeDist = SGDEdist(positiveGrid, positiveAlpha, trainSamples, bounds=bounds) # ----------------------------------------------------------- cvPositiveSgde = positiveSgdeDist.crossEntropy(testSamples) if plot and numDims == 2: fig = plt.figure() plotSG2d(grid, alpha, show_negative=True, show_grid_points=True) plt.title("pos: N=%i: vol=%g, log=%g" % (positiveGrid.getSize(), doQuadrature(positiveGrid, positiveAlpha), cvPositiveSgde)) plt.tight_layout() if out: plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_density_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) else: plt.close(fig) # ----------------------------------------------------------- print(" positive: gs=%i -> CV test = %g" % (positiveGrid.getSize(), cvPositiveSgde)) # ----------------------------------------------------------- # select the best density available based on the given criterion results[key] = {'config': config, 'dist': positiveSgdeDist} crossEntropies[key] = cvPositiveSgde key += 1 candidateSearch = opPositive.getCandidateSetAlgorithm() if cvPositiveSgde < bestCV: bestCV = cvPositiveSgde bestDist = positiveSgdeDist numComparisons = candidateSearch.costsComputingCandidates() # update the stats -> just for the current best one # write the stats of the current best results to the stats dict C = np.ndarray(numDims - 1, dtype="int") M = np.sum([1 for i in range(len(alpha)) if alpha[i] < 0]) for d in range(2, numDims + 1): C[d - 2] = binom(M, d) stats['config']['refnums'] = refinementSteps stats['config']['adaptive'] = refinementSteps > 0 stats['negSGDE_json'] = sgdeDist.toJson() stats['posSGDE_json'] = positiveSgdeDist.toJson() stats['level'] = level stats['maxLevel'] = maxLevel stats['fullGridSize'] = (2**maxLevel - 1)**numDims stats['sparseGridSize'] = grid.getSize() stats['discretizedGridSize'] = positiveGrid.getSize() stats['crossEntropyTrainZeroSGDE'] = sgdeDist.crossEntropy( trainSamples) stats[ 'crossEntropyTrainDiscretizedSGDE'] = positiveSgdeDist.crossEntropy( trainSamples) stats['crossEntropyTestZeroSGDE'] = cvSgde stats['crossEntropyTestDiscretizedSGDE'] = cvPositiveSgde stats['numCandidates'] = int(candidateSearch.numCandidates()) stats['numCandidatesPerLevel'] = np.array( candidateSearch.numCandidatesPerLevel().array(), dtype="int") stats['numCandidatesPerIteration'] = np.array( candidateSearch.numCandidatesPerIteration().array(), dtype="int") stats[ 'costsCandidateSearch'] = candidateSearch.costsComputingCandidates( ) stats['costsCandidateSearchBinomial'] = int(C.sum()) stats['costsCandidateSearchPerIteration'] = np.array( candidateSearch.costsComputingCandidatesPerIteration( ).array(), dtype="int") stats['costsCandidateSearchPerIterationBinomial'] = C if plot and numDims == 2: fig = plt.figure() plotSG2d( positiveGrid, positiveAlpha, show_negative=True, show_grid_points=False, colorbarLabel= r"$f_{\mathcal{I}^\text{SG} \cup \mathcal{I}^\text{ext}}$" ) plt.title(r"positive: $N=%i/%i$; \# comparisons$=%i$" % (positiveGrid.getSize(), (2**maxLevel - 1)**numDims, numComparisons)) plt.xlabel(r"$\xi_1$") plt.ylabel(r"$\xi_2$") # plt.title(r"N=%i $\rightarrow$ %i: log=%g $\rightarrow$ %g" % (sgdeDist.grid.getSize(), # positiveSgdeDist.grid.getSize(), # cvSgde, # cvPositiveSgde)) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) fig, ax, _ = plotSG3d(positiveGrid, positiveAlpha) ax.set_zlabel( r"$f_{\mathcal{I}^{\text{SG}} \cup \mathcal{I}^\text{ext}}(\xi_1, \xi_2)$", fontsize=20) ax.set_xlabel(r"$\xi_1$", fontsize=20) ax.set_ylabel(r"$\xi_2$", fontsize=20) plt.tight_layout() plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.jpg" % (label, iteration, level, refinementSteps))) plt.savefig( os.path.join( pathResults, "%s_pos_i%i_l%i_r%i_3d.pdf" % (label, iteration, level, refinementSteps))) if out: plt.close(fig) if plot and numDims == 2 and not out: plt.show() if out: # save stats filename = os.path.join( "data", label, "stats_d%i_a%i_r%i_i%i_%s_%s.pkl" % (numDims, 1, refinementSteps, iteration, candidates, interpolation)) fd = open(filename, "w") pkl.dump(stats, fd) fd.close() print("stats saved to -> '%s'" % filename) # dictionary that stores the information on the estimated densities myjson = { "Grid": { "dimNames": ["phi", "log(K_A)"], "matrixEntries": ["phi", "log(K_A)"] }, "Set": { "path": "", "grids": [], "alphas": [], "paramValues": [], "paramName": "grid_size" } } for key, result in list(results.items()): config = result['config'] dist = result['dist'] # serialize grid and coefficients out = "sgde.i%i.k%i.N%i" % (iteration, key, dist.grid.getSize()) out_grid = os.path.join(pathResults, "%s.grid" % out) out_alpha = os.path.join(pathResults, "%s.alpha.arff" % out) writeGrid(out_grid, dist.grid) writeAlphaARFF(out_alpha, dist.alpha) # collect information for json myjson["Set"]["grids"].append(os.path.abspath(out_grid)) myjson["Set"]["alphas"].append(os.path.abspath(out_alpha)) myjson["Set"]["paramValues"].append(crossEntropies[key]) # ----------------------------------------------------------- # serialize the config out_config = os.path.join(pathResults, "sgde.i%i.k%i.config" % (iteration, key)) fd = open(out_config, "w") json.dump(config, fd, ensure_ascii=True, indent=True) fd.close() crossEntropies[key] = (crossEntropies[key], out_grid, out_alpha, out_config) # sort the results in myjson according to the cross entropy ixs = np.argsort(myjson["Set"]["paramValues"]) myjson["Set"]["grids"] = [myjson["Set"]["grids"][ix] for ix in ixs] myjson["Set"]["alphas"] = [myjson["Set"]["alphas"][ix] for ix in ixs] myjson["Set"]["paramValues"] = [ myjson["Set"]["paramValues"][ix] for ix in ixs ] # serialize myjson out_config = os.path.join(pathResults, "sgde_visualization.i%i.config" % iteration) fd = open(out_config, "w") json.dump(myjson, fd, ensure_ascii=True, indent=True) fd.close() # serialize cross entropies out_crossEntropies = os.path.join( pathResults, "sgde_cross_entropies.i%i.csv" % iteration) fd = open(out_crossEntropies, 'wb') file_writer = csv.writer(fd) file_writer.writerow(["crossEntropy", "grid", "alpha", "sgdeConfig"]) for out in list(crossEntropies.values()): file_writer.writerow(out) fd.close() # serialize samples np.savetxt( os.path.join(pathResults, "sgde_train_samples.i%i.csv" % iteration), trainSamples) np.savetxt( os.path.join(pathResults, "sgde_test_samples.i%i.csv" % iteration), testSamples) # serialize best configuration to json out_bestDist = os.path.join(pathResults, "sgde_best_config.i%i.json" % iteration) text = bestDist.toJson() fd = open(out_bestDist, "w") fd.write(text) fd.close() return bestDist, stats
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join(pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join(pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join(pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join(pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join(pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join(pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError('optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = {'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy} # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join(pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- return ans
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join( pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join( pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join( pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join( pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join( pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join( pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError( 'optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = { 'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy } # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join( pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join( pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack( ([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- return ans