def update(self, grid, v, gpi, params, *args, **kws): """ Compute ranking for variance estimation \argmax_{i \in \A} |v_i| \sqrt{E[\varphi_i^2]} @param grid: Grid grid @param v: numpy array coefficients """ # get grid point associated to ix gs = grid.getStorage() p = DataVector(gs.getDimension()) gs.getCoordinates(gpi, p) # get joint distribution ap = params.activeParams() U = ap.getIndependentJointDistribution() T = ap.getJointTransformation() q = T.unitToProbabilistic(p.array()) # scale surplus by probability density ix = gs.getSequenceNumber(gpi) fx = U.pdf(q) ux = evalSGFunction(grid, v, p.array()) # update the ranking return np.abs((fx**2 - fx) * v[ix] * (2 * ux - v[ix]))
def plotGrid2d(grid, alpha=None, show_numbers=True, xlim=(0, 1), ylim=(0, 1), *args, **kws): gs = grid.getStorage() gps = { 'a': np.ndarray((0, 2)), 'p': np.ndarray((0, 2)), 'n': np.ndarray((0, 2)) } p = DataVector(2) numbers = [] for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) if alpha is None: gps['a'] = np.vstack((gps['a'], p.array())) else: if alpha[i] >= 0: gps['p'] = np.vstack((gps['p'], p.array())) else: gps['n'] = np.vstack((gps['n'], p.array())) numbers.append((i, p[0], p[1])) # plot the grid points if alpha is None: plt.plot(gps['a'][:, 0], gps['a'][:, 1], "o ", color='black', *args, **kws) else: plt.plot(gps['p'][:, 0], gps['p'][:, 1], "^ ", color='blue', *args, **kws) plt.plot(gps['n'][:, 0], gps['n'][:, 1], "v ", color='red', *args, **kws) plt.xlim(xlim[0], xlim[1]) plt.ylim(ylim[0], ylim[1]) if show_numbers: for i, x, y in numbers: plt.text(x, y, "%i" % i, color='black', fontsize=12)
def plotGrid(grid, alpha, admissibleSet, params, refined=None): gs = grid.getStorage() T = params.getJointTransformation() p = DataVector(2) x = np.ndarray((gs.getSize(), 2)) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) x[i, :] = T.unitToProbabilistic(p.array()) a = np.ndarray((gs.getSize(), 2)) for i, gp in enumerate(admissibleSet): gs.getCoordinates(gp, p) a[i, :] = T.unitToProbabilistic(p.array()) r = np.ndarray((len(refined), 2)) if refined: for i, gpi in enumerate(refined): gs.getCoordinates(gpi, p) r[i, :] = T.unitToProbabilistic(p.array()) n = 50 U = params.getIndependentJointDistribution() plotDensity2d(U) # plot grid points plt.plot(x[:, 0], x[:, 1], linestyle=' ', marker='o', color='g', markersize=20) # grid plt.plot(a[:, 0], a[:, 1], linestyle=' ', marker='^', color='y', markersize=20) # admissible set plt.plot(r[:, 0], r[:, 1], linestyle=' ', marker='v', color='r', markersize=20) # refined points plt.title("size = %i" % gs.getSize()) global myid # plt.savefig("out_%i.jpg" % (myid)) # plt.close() myid += 1
def __computeRanking(self, v, A, b): """ Compute ranking for variance estimation \argmax_{i \in \A} | v (2 Av - vb) | @param v: DataVector, coefficients of known grid points @param A: DataMatrix, stiffness matrix @param b: DataVector, squared expectation value contribution @return: numpy array, contains the ranking for the given samples """ # update the ranking av = DataVector(A.getNrows()) av.setAll(0.0) # = Av for i in xrange(A.getNrows()): for j in xrange(A.getNcols()): av[i] += A.get(i, j) * v[j] av.mult(2.) # = 2 * Av b.componentwise_mult(v) # = v * b av.sub(b) # = 2 * Av - v * b w = DataVector(v) w.componentwise_mult(av) # = v * (2 * Av - v * b) w.abs() # = | v * (2 * Av - v * b) | return w.array()
def gradient_fun(self, params): ''' Compute the gradient vector in the current state ''' #import ipdb; ipdb.set_trace() # gradient_array = np.empty((self.batch_size, self.grid.getSize())) for sample_idx in xrange(self.batch_size): x = self._lastseen[sample_idx, :self.dim] y = self._lastseen[sample_idx, self.dim] params_DV = DataVector(params) gradient = DataVector(len(params_DV)) single_alpha = DataVector(1) single_alpha[0] = 1 data_matrix = DataMatrix(x.reshape(1,-1)) mult_eval = createOperationMultipleEval(self.grid, data_matrix); mult_eval.multTranspose(single_alpha, gradient); residual = gradient.dotProduct(params_DV) - y; gradient.mult(residual); #import ipdb; ipdb.set_trace() # gradient_array[sample_idx, :] = gradient.array() return gradient_array
def plotGrid2d(grid, alpha=None): gs = grid.getStorage() gps = {'p': np.zeros([0, 2]), 'n': np.zeros([0, 2])} p = DataVector(2) for i in xrange(gs.size()): gs.get(i).getCoords(p) if alpha is None or alpha[i] >= 0: gps['p'] = np.vstack((gps['p'], p.array())) else: gps['n'] = np.vstack((gps['n'], p.array())) # plot the grid points plt.plot(gps['p'][:, 0], gps['p'][:, 1], "^ ", color='red') plt.plot(gps['n'][:, 0], gps['n'][:, 1], "v ", color='red') plt.xlim(0, 1) plt.ylim(0, 1)
def plotSG3d(grid, alpha, n=50, f=lambda x: x): fig = plt.figure() ax = fig.gca(projection='3d') X = np.linspace(0, 1, n) Y = np.linspace(0, 1, n) X, Y = np.meshgrid(X, Y) Z = np.zeros(n * n).reshape(n, n) for i in xrange(len(X)): for j, (x, y) in enumerate(zip(X[i], Y[i])): Z[i, j] = f(evalSGFunction(grid, alpha, DataVector([x, y]))) # get grid points gs = grid.getStorage() gps = np.zeros([gs.size(), 2]) p = DataVector(2) for i in xrange(gs.size()): gs.get(i).getCoords(p) gps[i, :] = p.array() surf = ax.plot_surface(X, Y, Z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.scatter(gps[:, 0], gps[:, 1], np.zeros(gs.size())) ax.set_xlim(0, 1) ax.set_ylim(0, 1) # ax.set_zlim(0, 2) fig.colorbar(surf, shrink=0.5, aspect=5) return fig, ax, Z
def update(self, grid, v, gpi, params, *args, **kws): """ Compute ranking for variance estimation \argmax_{i \in \A} |w_i| f(x_i) @param grid: Grid grid @param v: numpy array coefficients @param admissibleSet: AdmissibleSet """ # get grid point associated to ix gs = grid.getStorage() p = DataVector(gs.getDimension()) # get joint distribution ap = params.activeParams() U = ap.getIndependentJointDistribution() T = ap.getJointTransformation() ix = gs.getSequenceNumber(gpi) gs.getCoordinates(gpi, p) q = T.unitToProbabilistic(p.array()) fx = U.pdf(q) # update the ranking return np.abs(v[ix] * fx)
def checkPositivity(grid, alpha): # define a full grid of maxlevel of the grid gs = grid.getStorage() fullGrid = Grid.createLinearGrid(gs.getDimension()) fullGrid.getGenerator().full(gs.getMaxLevel()) fullHashGridStorage = fullGrid.getStorage() A = np.ndarray( (fullHashGridStorage.getSize(), fullHashGridStorage.getDimension())) p = DataVector(gs.getDimension()) for i in range(fullHashGridStorage.getSize()): fullHashGridStorage.getCoordinates(fullHashGridStorage.getPoint(i), p) A[i, :] = p.array() negativeGridPoints = {} res = evalSGFunctionMulti(grid, alpha, A) ymin, ymax, cnt = 0, -1e10, 0 for i, yi in enumerate(res): # print( A[i, :], yi ) if yi < -1e-11: cnt += 1 negativeGridPoints[i] = yi, HashGridPoint( fullHashGridStorage.getPoint(i)) ymin = min(ymin, yi) ymax = max(ymax, yi) # print( " %s = %g" % (A[i, :], yi) ) if cnt > 0: print("warning: function is not positive") print("%i/%i: [%g, %g]" % (cnt, fullHashGridStorage.getSize(), ymin, ymax)) return negativeGridPoints
def evalSGFunctionMulti(grid, alpha, samples, isConsistent=True): if len(samples.shape) == 1: raise AttributeError( 'the samples to be evaluated have to be a 2d numpy array') if samples.shape[1] != grid.getStorage().getDimension(): raise AttributeError( 'the dimensionality of the samples differ from the dimensionality of the grid (%i != %i)' % (samples.shape[1], grid.getStorage().getDimension())) samples_matrix = DataMatrix(samples) if isConsistent: if grid.getType() in multipleEvalNaiveGridTypes: opEval = createOperationMultipleEvalNaive(grid, samples_matrix) else: if grid.getType() == GridType_Linear: # use streaming approach for multiple eval evalConfig = OperationMultipleEvalConfiguration( OperationMultipleEvalType_STREAMING, OperationMultipleEvalSubType_DEFAULT) opEval = createOperationMultipleEval(grid, samples_matrix, evalConfig) else: # use standard approach opEval = createOperationMultipleEval(grid, samples_matrix) else: opEval = createOperationMultipleEvalNaive(grid, samples_matrix) res_vec = DataVector(samples.shape[0]) alpha_vec = DataVector(alpha) opEval.mult(alpha_vec, res_vec) return res_vec.array()
def serializeToFile(self, memento, filename): fstream = self.gzOpen(filename, "w") try: figure = plt.figure() grid = memento storage = grid.getStorage() coord_vector = DataVector(storage.dim()) points = zeros([storage.size(), storage.dim()]) for i in xrange(storage.size()): point = storage.get(i) point.getCoords(coord_vector) points[i] = [j for j in coord_vector.array()] num_of_sublots = storage.dim()*(storage.dim()-1)/2 rows = int(ceil(sqrt(num_of_sublots))) cols = int(floor(sqrt(num_of_sublots))) i = 1 for x1 in xrange(1,storage.dim()): for x2 in xrange(2,storage.dim()+1): figure.add_subplot(rows*100 + cols*10 + i) figure.add_subplot(rows, cols, i) plt.xlabel('x%d'%x1, figure=figure) plt.ylabel('x%d'%x2, figure=figure) plt.scatter(points[:,x1-1], points[:,x2-1], figure=figure) i +=1 plt.savefig(fstream, figure=figure) plt.close(figure) finally: fstream.close()
def addConst(grid, alpha, c, y): alpha_vec = DataVector(alpha) opHier = createOperationHierarchisation(grid) opHier.doDehierarchisation(alpha_vec) for i in range(alpha_vec.getSize()): alpha_vec[i] = c * alpha_vec[i] + y opHier.doHierarchisation(alpha_vec) return alpha_vec.array()
def hierarchizeFun(fun): nodalValues = np.ndarray(grid.getSize()) p = DataVector(gs.getDimension()) for i in range(gs.getSize()): gs.getPoint(i).getStandardCoordinates(p) nodalValues[i] = fun(p.array()) return hierarchize(grid, nodalValues)
def makeAddedNodalValuesPositive(self, grid, alpha, addedGridPoints, tol=-1e-14): neg = [] gs = grid.getStorage() x = DataVector(gs.getDimension()) for gp in addedGridPoints: gp.getStandardCoordinates(x) yi = evalSGFunction(grid, alpha, x.array()) if yi < tol: i = gs.getSequenceNumber(gp) alpha[i] -= yi assert alpha[i] > -1e-14 assert evalSGFunction(grid, alpha, x.array()) < 1e-14 return alpha
def __init__(self, grid, alpha, trainData=None, bounds=None, config=None, learner=None, unitIntegrand=True, isPositive=True): super(SGDEdist, self).__init__(grid.getStorage().getDimension(), trainData, bounds) self.grid = grid.clone() self.alpha = alpha.copy() self.alpha_vec = DataVector(alpha) if trainData is not None: self.trainData = trainData.copy() else: self.trainData = None self.config = config self.unitIntegrand = unitIntegrand if learner is None and trainData is not None: trainData_vec = DataMatrix(trainData) self.learner = SparseGridDensityEstimator(self.grid, self.alpha_vec, trainData_vec) else: self.learner = learner if trainData is None: self.dim = grid.getStorage().getDimension() elif self.dim != grid.getStorage().getDimension(): raise AttributeError("the dimensionality of the data differs from the one of the grid") assert self.grid.getSize() == len(self.alpha) if isPositive: self.vol = createOperationQuadrature(self.grid).doQuadrature(self.alpha_vec) else: # do monte carlo quadrature to estimate the volume n = 20000 numDims = grid.getStorage().getDimension() generator = LatinHypercubeSampleGenerator(numDims, n) samples = np.ndarray((n, numDims)) sample = DataVector(numDims) for i in range(samples.shape[0]): generator.getSample(sample) samples[i, :] = sample.array() values = evalSGFunction(grid, alpha, samples) self.vol = np.mean([max(0.0, value) for value in values]) # scale the coefficients such that it has unit integrand self.unnormalized_alpha = np.array(self.alpha / self.vol) self.unnormalized_alpha_vec = DataVector(self.unnormalized_alpha) self.vol *= self.trans.vol() if unitIntegrand and self.vol > 1e-13: self.alpha /= self.vol self.alpha_vec.mult(1. / self.vol)
def dehierarchize(grid, alpha): # dehierarchization gs = grid.getStorage() p = DataVector(gs.getDimension()) nodalValues = DataVector(gs.getSize()) A = np.ndarray((gs.getSize(), gs.getDimension())) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) A[i, :] = p.array() return evalSGFunctionMulti(grid, alpha, A)
def dehierarchizeOnNewGrid(gridResult, grid, alpha): # dehierarchization gs = gridResult.getStorage() ps = np.ndarray((gs.getSize(), gs.getDimension())) p = DataVector(gs.getDimension()) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) ps[i, :] = p.array() nodalValues = evalSGFunctionMulti(grid, alpha, ps) return nodalValues
def plotSGNodal1d(grid, alpha): gs = grid.getStorage() A = np.ndarray([gs.getSize(), 2]) p = DataVector(2) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) A[i, 0] = p[0] A[i, 1] = evalSGFunction(grid, alpha, p.array()) return plotNodal1d(A), A
def pdf(self, x): x = self._convertEvalPoint(x) x_matrix = DataMatrix(x) res_vec = DataVector(x.shape[0]) self.dist.pdf(x_matrix, res_vec) res = res_vec.array() if len(res) == 1: return res[0] else: return res
def nextSamples(self, n=1): p = DataVector(self._dim) ans = Samples(self._params, dtype=DistributionType.UNITUNIFORM) U = self._params.activeParams().getIndependentJointDistribution() for _ in xrange(n): self.__genObj.getSample(p) # transform it to the probabilistic space q = U.ppf(p.array()) # add it to the output ans.add(q, dtype=SampleType.ACTIVEPROBABILISTIC) return ans
def computeHierarchicalCoefficients(self, grid, alpha, newGridPoints): # define the order of computing the interpolated values -> this # makes sure that all function values of the hierarchical ancestors # exist for the current value we are interpolating gs = grid.getStorage() nodalValues = np.ndarray(gs.getSize()) p = DataVector(gs.getDimension()) for i in range(gs.getSize()): gs.getPoint(i).getStandardCoordinates(p) nodalValues[i] = self.func(p.array()) return hierarchize(grid, nodalValues)
def getCollocationNodes(self): """ Create a set of all collocation nodes """ gs = self.grid.getStorage() ps = np.ndarray([gs.size(), gs.dim()], dtype='float32') p = DataVector(gs.dim()) for i in xrange(gs.size()): gs.get(i).getCoords(p) ps[i, :] = p.array() return ps
def getCollocationNodes(self): """ Create a set of all collocation nodes """ gs = self.grid.getStorage() ps = np.ndarray([gs.getSize(), gs.getDimension()], dtype='float') p = DataVector(gs.getDimension()) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) ps[i, :] = p.array() return ps
def evalSGFunctionBasedOnParents(grid, alpha, gpi): gs = grid.getStorage() basis = getBasis(grid) ux = 0.0 p = DataVector(gs.getDimension()) gs.getCoordinates(gpi, p) def f(gp, p): ans = 1.0 for idim in range(p.shape[0]): ans *= basis.eval(gp.getLevel(idim), gp.getIndex(idim), p[idim]) return ans for j in range(gs.getSize()): gpp = gs.getPoint(j) if gpp.isHierarchicalAncestor(gpi): ux += alpha[j] * f(gpp, p.array()) else: assert f(gpp, p.array()) < 1e-14 return ux
def test_2DNormalDist_variance(self): # prepare data U = dists.J( [dists.Normal(2.0, .5, -1, 4), dists.Normal(1.0, .5, -1, 3)]) # U = dists.J([dists.Normal(0.5, .5, -1, 2), # dists.Normal(0.5, .4, -1, 2)]) # define linear transformation trans = JointTransformation() for a, b in U.getBounds(): trans.add(LinearTransformation(a, b)) # get a sparse grid approximation grid = Grid.createPolyGrid(U.getDim(), 10) grid.getGenerator().regular(5) gs = grid.getStorage() # now refine adaptively 5 times p = DataVector(gs.getDimension()) nodalValues = np.ndarray(gs.getSize()) # set function values in alpha for i in range(gs.getSize()): gs.getPoint(i).getStandardCoordinates(p) nodalValues[i] = U.pdf(trans.unitToProbabilistic(p.array())) # hierarchize alpha = hierarchize(grid, nodalValues) # # make positive # alpha_vec = DataVector(alpha) # createOperationMakePositive().makePositive(grid, alpha_vec) # alpha = alpha_vec.array() dist = SGDEdist(grid, alpha, bounds=U.getBounds()) fig = plt.figure() plotDensity2d(U) fig.show() fig = plt.figure() plotSG2d(dist.grid, dist.alpha, addContour=True, show_negative=True, show_grid_points=True) fig.show() print("2d: mean = %g ~ %g" % (U.mean(), dist.mean())) print("2d: var = %g ~ %g" % (U.var(), dist.var())) plt.show()
def sampleGrids(self, filename): ts = self.__uqManager.getTimeStepsOfInterest() names = self.__params.getNames() names.append('f_\\mathcal{I}(x)') for t in ts: grid, surplus = self.__knowledge.getSparseGridFunction( self._qoi, t) # init gs = grid.getStorage() dim = gs.getDimension() # ----------------------------------------- # do full grid sampling of sparse grid function # ----------------------------------------- data = eval_fullGrid(4, dim) res = evalSGFunctionMulti(grid, surplus, data) data = np.vstack((data.T, res)).T # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.samples.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write sparse grid points to file # ----------------------------------------- data = np.ndarray((gs.getSize(), dim)) x = DataVector(dim) for i in range(gs.getSize()): gp = gs.getPoint(i) gs.getCoordinates(gp, x) data[i, :] = x.array() # write results data_vec = DataMatrix(data) writeDataARFF({ 'filename': "%s.t%f.gridpoints.arff" % (filename, t), 'data': data_vec, 'names': names }) del data_vec # ----------------------------------------- # write alpha # ----------------------------------------- writeAlphaARFF("%s.t%f.alpha.arff" % (filename, t), surplus)
def update(self, grid, v, gpi, params, *args, **kws): # get grid point associated to ix gs = grid.getStorage() p = DataVector(gs.getDimension()) gs.getCoordinates(gpi, p) # get joint distribution ap = params.activeParams() U = ap.getIndependentJointDistribution() T = ap.getJointTransformation() q = T.unitToProbabilistic(p.array()) # scale surplus by probability density ix = gs.getSequenceNumber(gpi) return np.abs(v[ix]) * U.pdf(q)
def eval_fullGrid(level, dim, border=True): if border: grid = Grid.createLinearBoundaryGrid(dim, 1) else: grid = Grid.createLinearGrid(dim) grid.getGenerator().full(level) gs = grid.getStorage() ans = np.ndarray((gs.getSize(), dim)) p = DataVector(dim) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) ans[i, :] = p.array() return ans
def hierarchizeEvalHierToTop(grid, nodalValues): gs = grid.getStorage() numDims = gs.getDimension() # load a new empty grid which we fill step by step newGrid = grid.createGridOfEquivalentType() newGs = newGrid.getStorage() alpha = np.ndarray(1) # add root node to the new grid newGs.insert(gs.getPoint(0)) alpha[0] = nodalValues[0] # sort points by levelsum ixs = {} for i in range(gs.getSize()): levelsum = gs.getPoint(i).getLevelSum() # skip root node if levelsum > numDims: if levelsum in ixs: ixs[levelsum].append(i) else: ixs[levelsum] = [i] # run over the grid points by level sum x = DataVector(numDims) for levelsum in np.sort(list(ixs.keys())): # add the grid points of the current level to the new grid newixs = [None] * len(ixs[levelsum]) for i, ix in enumerate(ixs[levelsum]): newixs[i] = (newGs.insert(gs.getPoint(ix)), nodalValues[ix]) # update the alpha values alpha = np.append(alpha, np.zeros(newGs.getSize() - len(alpha))) newAlpha = np.copy(alpha) for ix, nodalValue in newixs: gs.getCoordinates(newGs.getPoint(ix), x) alpha[ix] = nodalValue - evalSGFunction(newGrid, newAlpha, x.array()) del x # store alphas according to indices of grid ans = np.ndarray(gs.getSize()) for i in range(gs.getSize()): j = newGs.getSequenceNumber(gs.getPoint(i)) ans[i] = alpha[j] return ans
def test_1DNormalDist_variance(self): # prepare data U = dists.Normal(1, 2, -8, 8) # U = dists.Normal(0.5, .2, 0, 1) # define linear transformation trans = JointTransformation() a, b = U.getBounds() trans.add(LinearTransformation(a, b)) # get a sparse grid approximation grid = Grid.createPolyGrid(U.getDim(), 10) grid.getGenerator().regular(5) gs = grid.getStorage() # now refine adaptively 5 times p = DataVector(gs.getDimension()) nodalValues = np.ndarray(gs.getSize()) # set function values in alpha for i in range(gs.getSize()): gs.getPoint(i).getStandardCoordinates(p) nodalValues[i] = U.pdf(trans.unitToProbabilistic(p.array())) # hierarchize alpha = hierarchize(grid, nodalValues) dist = SGDEdist(grid, alpha, bounds=U.getBounds()) fig = plt.figure() plotDensity1d(U, alpha_value=0.1, mean_label="$\mathbb{E}", interval_label="$\alpha=0.1$") fig.show() fig = plt.figure() plotDensity1d(dist, alpha_value=0.1, mean_label="$\mathbb{E}", interval_label="$\alpha=0.1$") fig.show() print("1d: mean = %g ~ %g" % (U.mean(), dist.mean())) print("1d: var = %g ~ %g" % (U.var(), dist.var())) plt.show()
def plotGrid3d(grid, grid_points_at=0, ax=None): if ax is None: fig = plt.figure() ax = fig.gca(projection='3d') # get grid points gs = grid.getStorage() gps = np.zeros([gs.getSize(), 2]) p = DataVector(2) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) gps[i, :] = p.array() ax.plot(gps[:, 0], gps[:, 1], np.ones(gps.shape[0]) * grid_points_at, " ", c="red", marker="o", ms=15)
def refineGrid(self): # load the time steps we use for refinement # refinets = self.getRefinement().getAdaptTimeWindow() refinets = self.getTimeStepsOfInterest() oldGridSize = self.getGrid().getSize() oldAdmissibleSetSize = self.getRefinement().getAdmissibleSet().getSize() # refine newCollocationNodes = self.getRefinement().refineGrid(self, refinets) # increase counter self.iteration += 1 # print some information if self._verbose: print "iteration: %i" % self.iteration print "old grid size: %i" % oldGridSize print "old AS size: %i" % oldAdmissibleSetSize print "new collocation nodes: %i" % len(newCollocationNodes) print "new grid size:", self.getGrid().getSize() print "new AS size: %i" % self.getRefinement()\ .getAdmissibleSet()\ .getSize() # fig = plotGrid(self.__grid, self.__knowledge.getAlpha(self.getQoI()), # self.getRefinement().getAdmissibleSetCreator() # .getAdmissibleSet(), # self.getParameters(), newCollocationNodes) # fig.savefig('%i.png' % self._learner.iteration) # parse them to a numpy array gs = self.grid.getStorage() p = DataVector(gs.dim()) ans = np.ndarray([len(newCollocationNodes), gs.dim()], dtype='float32') for i, gp in enumerate(newCollocationNodes): gp.getCoords(p) ans[i, :] = p.array() return ans
def pdf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x res = DataVector(A.getNrows()) res.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) res = DataVector(1) res.setAll(0) self.dist.pdf(A, res) if len(res) == 1: return res[0] else: return res.array()
def discretizeFunction(f, bounds, level=2, hasBorder=False, *args, **kws): # define linear transformation to the unit hyper cube T = JointTransformation() for xlim in bounds: T.add(LinearTransformation(xlim[0], xlim[1])) # create grid dim = len(bounds) # create adequate grid if hasBorder: grid = Grid.createLinearBoundaryGrid(dim) else: grid = Grid.createLinearGrid(dim) # init storage grid.createGridGenerator().regular(level) gs = grid.getStorage() # discretize on given level p = DataVector(dim) nodalValues = DataVector(gs.size()) for i in xrange(gs.size()): gs.get(i).getCoords(p) # transform to the right space q = T.unitToProbabilistic(p.array()) # apply the given function nodalValues[i] = float(f(q)) # hierarchize alpha = hierarchize(grid, nodalValues) # estimate the l2 error err = estimateDiscreteL2Error(grid, alpha, f) # TODO: adaptive refinement return grid, alpha, err
def computeErrors(jgrid, jalpha, grid, alpha, f, n=200): """ Compute some errors to estimate the quality of the interpolation. @param jgrid: Grid, new discretization @param jalpha: DataVector, new surpluses @param grid: Grid, old discretization @param alpha: DataVector, old surpluses @param f: function, to be interpolated @param n: int, number of Monte Carlo estimates for error estimation @return: tuple(<float>, <float>), maxdrift, l2norm """ jgs = jgrid.getStorage() # create control samples samples = DataMatrix(np.random.rand(n, jgs.dim())) # evaluate the sparse grid functions jnodalValues = evalSGFunctionMulti(jgrid, jalpha, samples) nodalValues = evalSGFunctionMulti(grid, alpha, samples) # compute errors p = DataVector(jgs.dim()) err = DataVector(n) for i in xrange(n): samples.getRow(i, p) y = f(p.array(), nodalValues[i]) err[i] = abs(y - jnodalValues[i]) # get error statistics # l2 l2norm = err.l2Norm() # maxdrift err.abs() maxdrift = err.max() return maxdrift, l2norm
def ppf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) # do the transformation if self.grid.getStorage().dim() == 1: op = createOperationInverseRosenblattTransformation1D(self.grid) ans = np.ndarray(len(x)) for i, xi in enumerate(x.array()): ans[i] = op.doTransformation1D(self.alpha, xi) if len(ans) == 1: return ans[0] else: return ans else: if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation op = createOperationInverseRosenblattTransformation(self.grid) op.doTransformation(self.alpha, A, B) # extract the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array()
def computeCoefficients(jgrid, grid, alpha, f): """ Interpolate function f, which depends on some sparse grid function (grid, alpha) on jgrid @param jgrid: Grid, new discretization @param grid: Grid, old discretization @param alpha: DataVector, surpluses for grid @param f: function, to be interpolated @return: DataVector, surpluses for jgrid """ jgs = jgrid.getStorage() # dehierarchization p = DataVector(jgs.dim()) A = DataMatrix(jgs.size(), jgs.dim()) for i in xrange(jgs.size()): jgs.get(i).getCoords(p) A.setRow(i, p) nodalValues = evalSGFunctionMulti(grid, alpha, A) # apply f to all grid points jnodalValues = DataVector(jgs.size()) for i in xrange(len(nodalValues)): A.getRow(i, p) # print i, p.array(), nodalValues[i], alpha.min(), alpha.max() # if nodalValues[i] < -1e20 or nodalValues[i] > 1e20: # from pysgpp.extensions.datadriven.uq.operations import evalSGFunction, evalSGFunctionMultiVectorized # print alpha.min(), alpha.max() # print evalSGFunction(grid, alpha, p) # print evalSGFunctionMulti(grid, alpha, DataMatrix([p.array()])) # print evalSGFunctionMultiVectorized(grid, alpha, DataMatrix([p.array()])) # import ipdb; ipdb.set_trace() jnodalValues[i] = f(p.array(), nodalValues[i]) jalpha = hierarchize(jgrid, jnodalValues) return jalpha
def __init__(self, **kwargs): self.points = {} self.values = {} self.dataDict = {} self.specifications = {} if kwargs is None: raise Exception("Argument list is empty") try: if kwargs.has_key('adapter'): #takes (adapter: DataAdapter) adapter = kwargs['adapter'] container = adapter.loadData() self.points = container.points self.values = container.values self.dim = container.dim self.size = container.size self.specifications = container.specifications self.name = container.name else: if kwargs.has_key('size') and kwargs.has_key('dim'): #takes (size: int, dim: int, name="train") self.name = kwargs.get('name', self.TRAIN_CATEGORY) self.size = kwargs['size'] self.dim = kwargs['dim'] self.points[self.name] = DataMatrix(self.size, self.dim) self.values[self.name] = DataVector(self.size) specification = DataSpecification() specification.createNumericAttributes(self.dim) self.specifications[self.name] = specification elif kwargs.has_key('points') and kwargs.has_key('values'): #takes (points: DataVector, values: DataVector, name="train", filename=None) self.name = kwargs.get('name', self.TRAIN_CATEGORY) if isinstance(kwargs['points'], DataMatrix): self.points[self.name] = kwargs['points'] else: self.points[self.name] = DataMatrix(kwargs['points']) if isinstance(kwargs['values'], DataVector): self.values[self.name] = kwargs['values'] else: self.values[self.name] = DataVector(kwargs['values']) # creating dictionary for fast search point -> value self.dataDict[self.name] = {} p = DataVector(self.points[self.name].getNcols()) for i in xrange(self.points[self.name].getNrows()): self.points[self.name].getRow(i, p) key = tuple(p.array()) self.dataDict[self.name][key] = self.values[self.name][i] self.size = self.points[self.name].getNrows() self.dim = self.points[self.name].getNcols() specification = DataSpecification() specification.createNumericAttributes(self.dim) # if data comes from a file, note it in the specification filename = kwargs.get('filename', None) if not filename is None: specification.setFilename(filename) specification.setSaved() self.specifications[self.name] = specification self.tempPoint = DataVector(self.dim) self.tempValue = DataVector(1) except IndexError: raise Exception('Wrong or no attributes in constructor')
def doLearningIteration(self, points): """ Interpolates the given points with the current grid @param points: interpolation points @return: Return hierarchical surpluses """ gs = self.grid.getStorage() # assert that the number of dimensions of the data is the same # as the grids assert gs.dim() == points.getDim() nodalValues = DataVector(gs.size()) nodalValues.setAll(0.0) # interpolation on nodal basis p = DataVector(gs.dim()) cnt = 0 for i in xrange(gs.size()): gp = gs.get(i) gp.getCoords(p) x = tuple(p.array()) if x not in points: # # search for 2*d closest grid points # q = DataVector(gs.dim()) # l = np.array([]) # for j in xrange(gs.size()): # gs.get(j).getCoords(q) # q.sub(p) # l = np.append(l, q.l2Norm()) # n = min(gs.size(), gs.dim()) # ixs = np.argsort(l) # # nodalValues[i] = np.mean(l[ixs[:n]]) nodalValues[i] = 0.0 print p, nodalValues[i] cnt += 1 else: nodalValues[i] = float(points[x]) if cnt > 0: print '%i/%i of the grid points have \ been set to 0' % (cnt, gs.size()) pdb.set_trace() # hierarchization alpha = hierarchize(self.grid, nodalValues) # ----------------------------------------- # check if interpolation property is given # fig, _ = plotNodal3d(A) # fig.show() # fig, _ = plotSGNodal3d(self.grid, alpha) # fig.show() # fig, _ = plotSG3d(self.grid, alpha) # fig.show() err, _ = checkInterpolation(self.grid, alpha, nodalValues, epsilon=1e-12) if len(err) > 0: print "interpolation property not met" pdb.set_trace() # ----------------------------------------- return alpha
def __estimate(self, vol, grid, alpha, U, T, f, npaths): n = npaths * self.__n A = self.__getSamples(U, T, n) # import matplotlib.pyplot as plt # fig = plt.figure() # plt.plot(A[:, 0], A[:, 1], ' ', marker='^') # fig.show() # override the old samples with the new ones # A[:, :2] = self.__getSamples(l) # A[:, :2] = self.__getDataSamples() # fig = plt.figure() # plt.plot(A[:, 0], A[:, 1], ' ', marker='^') # fig.show() # A[:, :2] = self.__getDataSamples() # fig = plt.figure() # plt.plot(A[:, 0], A[:, 1], ' ', marker='^') # fig.show() # import ipdb; ipdb.set_trace() vals = evalSGFunctionMulti(grid, alpha, A).array() fx = np.ndarray([len(vals)], dtype='float') p = DataVector(A.getNcols()) for i, val in enumerate(vals): A.getRow(i, p) fx[i] = f(p.array(), val) # q = T.unitToProbabilistic(p) # A.setRow(i, DataVector(q)) # get the pdf of the values # fx *= U.pdf(A.array()) if self.__isPositive: fx = abs(fx) # # define here grid for corners and run the samples here too # grid_file = '/home/franzefn/Promotion/Projekte/CO2/UQ5analytical/results/co2_leakage_analytical/sgb1deg2/sg_l1/grids/sg.t%g.grid' % t # alpha_file = '/home/franzefn/Promotion/Projekte/CO2/UQ5analytical/results/co2_leakage_analytical/sgb1deg2/sg_l1/grids/sg.t%g.alpha.arff' % t # borderGrid = readGrid(grid_file) # borderAlpha = readAlphaARFF(alpha_file) # nodalValues = dehierarchize(grid, alpha) # gs = grid.getStorage() # bordergs = borderGrid.getStorage() # p = DataVector(gs.dim()) # for i in xrange(gs.size()): # gs.get(i).getCoords(p) # nodalValues[i] -= evalSGFunction(borderGrid, borderAlpha, p) # nalpha = hierarchize(grid, nodalValues) # # # check if interpolation criterion is fulfilled for splitted grid # # p = DataVector(gs.dim()) # # for i in xrange(gs.size()): # # gp = gs.get(i) # # if bordergs.has_key(gp): # # gp.getCoords(p) # # res1 = evalSGFunction(grid, alpha, p) # # res2 = evalSGFunction(grid, nalpha, p) + evalSGFunction(borderGrid, borderAlpha, p) # # print res1, res2, abs(res1 - res2) # # fig = scatterplot_matrix(A.T, ['phi', 'e', 'kl'], linestyle=' ', marker='o') # # fig.show() # res1 = evalSGFunctionMulti(grid, nalpha, DataMatrix(A)).array() # res2 = evalSGFunctionMulti(borderGrid, borderAlpha, DataMatrix(A)).array() # res = res1 + res2 mean = np.ndarray(npaths, dtype='float') for i in xrange(npaths): mean[i] = np.mean(fx[(i * self.__n):((i + 1) * self.__n)]) # * vol return mean
class LibAGFDist(Dist): """ The Sparse Grid Density Estimation (SGDE) distribution """ def __init__(self, trainData, samples=None, testData=None, bandwidths=None, transformation=None, surfaceFile=None): super(LibAGFDist, self).__init__() self.trainData = DataMatrix(trainData) self.testData = testData self.bounds = [[0, 1] for _ in xrange(trainData.shape[1])] if len(self.bounds) == 1: self.bounds = self.bounds[0] if transformation is not None: self.bounds = [trans.getBounds() for trans in transformation.getTransformations()] self.dim = trainData.shape[1] self.samples = samples self.transformation = transformation self.bandwidths = None if bandwidths is not None: self.bandwidths = bandwidths else: op = createOperationInverseRosenblattTransformationKDE(self.trainData) self.bandwidths = DataVector(self.dim) op.getOptKDEbdwth(self.bandwidths) self.surfaceFile = surfaceFile @classmethod def byConfig(cls, config): if config is not None and os.path.exists(config): # init density function traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile = \ cls.computeDensity(config) return cls.byFiles(traindatafile, samplefile, testFile, testOutFile, bandwidthFile, surfaceFile) @classmethod def byFiles(cls, trainDataFile, samplesFile=None, testFile=None, testOutFile=None, bandwidthFile=None, surfaceFile=None): # load training file if os.path.exists(trainDataFile): trainData = np.loadtxt(trainDataFile) if len(trainData.shape) == 1: trainData = np.array([trainData]).transpose() else: raise Exception('The training data file "%s" does not exist' % trainDataFile) # load samples for quadrature samples = None if samplesFile is not None: if os.path.exists(samplesFile): samples = np.loadtxt(samplesFile) # if the data is just one dimensional -> transform to # matrix with one column if len(samples.shape) == 1: samples = np.array([samples]).transpose() # load test file for evaluating pdf values testData = None if testFile is not None: if os.path.exists(testFile): testData = np.loadtxt(testFile) # if the data is just one dimensional -> transform to # matrix with one column if len(testData.shape) == 1: testData = np.array([testData]).transpose() # load bandwidths file for evaluating pdf values bandwidths = None if bandwidthFile is not None: if os.path.exists(bandwidthFile): bandwidths = np.loadtxt(bandwidthFile) # load pdf values for testSamples if available if testOutFile is not None: if os.path.exists(testOutFile): testLikelihood = np.loadtxt(testOutFile) # store the results in a hash map if testData is not None: testDataEval = {} for i, sample in enumerate(testData): testDataEval[tuple(sample)] = testLikelihood[i] if surfaceFile is not None and not os.path.exists(surfaceFile): surfaceFile = None return cls(trainData, samples=samples, testData=testDataEval, bandwidths=bandwidths, surfaceFile=surfaceFile) @classmethod def computeDensity(self, config, pathsgpp='/home/franzefn/workspace/SGppUQ/lib/sgpp', cluster='/home/franzefn/Promotion/UQ/benjamin/clustc/cluster'): if not os.path.exists(config): raise Exception('the config file "%s" does not exist' % config) os.environ['LD_LIBRARY_PATH'] = pathsgpp # ret = subprocess.Popen([clustc, "-c %s" % config], shell=True, env=os.environ) # ret = subprocess.call([clustc, "-c %s" % config], shell=True) ret = os.system("%s -c %s > out_libagf.log" % (cluster, config)) if ret != 0: raise Exception('The density estimation exited unexpectedly') # extract grid and alpha from config s = cp.ConfigParser() s.optionxform = str s.read(config) traindatafile = s.get('files', 'inFileTrain') samplesfile = None if 'samplesNumberSamples' in s.options('denest') and \ s.get('denest', 'samplesNumberSamples') > 0 and \ 'samplesOutput' in s.options('denest'): samplesfile = s.get('denest', 'samplesOutput') testFile = None if 'inFileTest' in s.options('files'): testFile = s.get('files', 'inFileTest') testOutFile = None if 'outFileTest' in s.options('files') and \ 'inFileTest' in s.options('files'): testOutFile = s.get('files', 'outFileTest') bandwidthsfile = None if 'printBandwidthsFile' in s.options('denest'): bandwidthsfile = s.get('denest', 'printBandwidthsFile') surfacefile = None if 'printSurfaceFile' in s.options('denest'): surfacefile = s.get('denest', 'printSurfaceFile') return traindatafile, samplesfile, testFile, testOutFile, bandwidthsfile, surfacefile def pdf_libagf(self, x): if isNumerical(x): x = [x] x = tuple(x) if x in self.testData: return self.testData[x] else: raise AttributeError("No pdf value for '%s' available" % (x,)) def pdf(self, x): n = self.trainData.getNrows() sigma = self.bandwidths.array() # normalization coefficient norm = 1. / (sigma * np.sqrt(2. * np.pi)) trainData = self.trainData.array() # normalize it trainData = (x - trainData) / sigma trainData = norm * np.exp(-trainData ** 2 / 2.) # scale the result by the number of samples return np.sum(np.prod(trainData, axis=1)) / n def cdf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation op = createOperationRosenblattTransformationKDE(self.trainData) op.doTransformation(A, B) # transform the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array() def ppf(self, x): # convert the parameter to the right format if isList(x): x = DataVector(x) elif isNumerical(x): x = DataVector([x]) if isinstance(x, DataMatrix): A = x B = DataMatrix(A.getNrows(), A.getNcols()) B.setAll(0.0) elif isinstance(x, DataVector): A = DataMatrix(1, len(x)) A.setRow(0, x) B = DataMatrix(1, len(x)) B.setAll(0) # do the transformation assert A.getNcols() == B.getNcols() == self.trainData.getNcols() op = createOperationInverseRosenblattTransformationKDE(self.trainData) op.doTransformation(A, B) # transform the outcome if isNumerical(x) or isinstance(x, DataVector): return B.get(0, 0) elif isinstance(x, DataMatrix): return B.array() def rvs(self, n=1): ixs = np.random.randint(0, len(self.samples), n) return self.samples[ixs, :] def mean(self, n=1e4): moment = 0. for sample, _ in self.testData.items(): moment += np.prod(sample) return moment / len(self.testData) def var(self): mean = self.mean() moment = 0. for sample, _ in self.testData.items(): moment += (np.prod(sample) - mean) ** 2 return moment / (len(self.testData) - 1) def getBounds(self): return self.bounds def getDim(self): return self.dim def getDistributions(self): return [self] def gnuplot(self, jpegFile, gnuplotConfig=None): if self.surfaceFile is not None and os.path.exists(self.surfaceFile): gnuplot = """ set terminal jpeg set output "%s" set view map set size ratio .9 set object 1 rect from graph 0, graph 0 to graph 1, graph 1 back set object 1 rect fc rgb "black" fillstyle solid 1.0 splot '%s' using 1:2:3 with points pointtype 5 pointsize 1 palette linewidth 0 """ if gnuplotConfig is None: gnuplotConfig = 'gnuplot.config' fd = open(gnuplotConfig, "w") fd.write(gnuplot % (jpegFile, self.surfaceFile)) fd.close() os.system("gnuplot %s" % gnuplotConfig) # ----------------------------------------------------------- else: raise Exception('surface file not found. specify "printSurfaceFile" in [denest] section of config') return def __str__(self): return "libAGF"