def createGrid(grid, dim, deg=1, addTruncatedBorder=False): # create new grid gridType = grid.getType() if gridType in [Poly, PolyBoundary]: deg = max(deg, grid.getDegree()) # print gridType, deg if deg > 1: if gridType in [LinearBoundary, PolyBoundary]: return Grid.createPolyBoundaryGrid(dim, deg) elif gridType == LinearL0Boundary: raise NotImplementedError("there is no full boundary polynomial grid") elif gridType in [Linear, Poly]: return Grid.createPolyGrid(dim, deg) else: raise Exception('unknown grid type %s' % gridType) else: if gridType == Linear: return Grid.createLinearGrid(dim) elif gridType == LinearBoundary: return Grid.createLinearBoundaryGrid(dim) elif gridType == LinearL0Boundary: return Grid.createLinearBoundaryGrid(dim, 0) else: raise Exception('unknown grid type %s' % gridType)
def createGrid(grid, dim, deg=1, addTruncatedBorder=False): # create new grid gridType = grid.getType() if gridType in [Poly, PolyBoundary]: deg = max(deg, grid.getDegree()) # print gridType, deg if deg > 1: if gridType in [LinearBoundary, PolyBoundary]: return Grid.createPolyBoundaryGrid(dim, deg) elif gridType == LinearL0Boundary: raise NotImplementedError( "there is no full boundary polynomial grid") elif gridType in [Linear, Poly]: return Grid.createPolyGrid(dim, deg) else: raise Exception('unknown grid type %s' % gridType) else: if gridType == Linear: return Grid.createLinearGrid(dim) elif gridType == LinearBoundary: return Grid.createLinearBoundaryGrid(dim) elif gridType == LinearL0Boundary: return Grid.createLinearBoundaryGrid(dim, 0) else: raise Exception('unknown grid type %s' % gridType)
def testOperationTest_test(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(1) alpha = DataVector(factory.getStorage().size()) data = DataMatrix(1,1) data.setAll(0.25) classes = DataVector(1) classes.setAll(1.0) testOP = factory.createOperationTest() alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = 1.0 c = testOP.test(alpha, data, classes) self.failUnless(c > 0.0) alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = -1.0 c = testOP.test(alpha, data, classes) self.failUnless(c == 0.0)
def testOperationB(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(2) alpha = DataVector(factory.getStorage().size()) p = DataMatrix(1,1) beta = DataVector(1) alpha.setAll(0.0) p.set(0,0,0.25) beta[0] = 1.0 opb = factory.createOperationB() opb.mult(beta, p, alpha) self.failUnlessAlmostEqual(alpha[0], 0.75) self.failUnlessAlmostEqual(alpha[1], 0.25) self.failUnlessAlmostEqual(alpha[2], 0.5) self.failUnlessAlmostEqual(alpha[3], 1.0) self.failUnlessAlmostEqual(alpha[4], 0.0) alpha.setAll(0.0) alpha[2] = 1.0 p.set(0,0, 0.25) beta[0] = 0.0 opb.multTranspose(alpha, p, beta) self.failUnlessAlmostEqual(beta[0], 0.5)
def testRefinement2d_two(self): from pysgpp import Grid, DataVector, SurplusRefinementFunctor factory = Grid.createLinearBoundaryGrid(2) storage = factory.getStorage() gen = factory.createGridGenerator() gen.regular(0) alpha = DataVector(4) for i in xrange(len(alpha)): alpha[i] = 0.0 alpha[0] = 1.0 func = SurplusRefinementFunctor(alpha) gen.refine(func) alpha2 = DataVector(8) for i in xrange(len(alpha2)): alpha2[i] = 0.0 alpha2[4] = 1.0 func = SurplusRefinementFunctor(alpha2) gen.refine(func) self.failUnlessEqual(storage.size(), 13)
def testSerializationLinearBoudaryBoundingBox(self): """Uses Linear grid for tests""" from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(2) self.failIfEqual(factory, None) gen = factory.createGridGenerator() gen.regular(3) boundingBox = factory.getBoundingBox() tempBound = boundingBox.getBoundary(0) tempBound.leftBoundary = 0.0 tempBound.rightBoundary = 100.0 tempBound.bDirichletLeft = False tempBound.bDirichletRight = False boundingBox.setBoundary(0, tempBound) str = factory.serialize() self.assert_(len(str) > 0) newfac = Grid.unserialize(str) self.failIfEqual(newfac, None) self.assertEqual(factory.getStorage().size(), newfac.getStorage().size()) boundingBox = newfac.getBoundingBox() tempBound = boundingBox.getBoundary(0) self.assertEqual(0.0, tempBound.leftBoundary) self.assertEqual(100.0, tempBound.rightBoundary) self.assertEqual(False, tempBound.bDirichletLeft) self.assertEqual(False, tempBound.bDirichletRight)
def testSerializationLinearBoudaryWithLeaf(self): """Uses Linear grid for tests""" from pysgpp import Grid srcLeaf = [] factory = Grid.createLinearBoundaryGrid(2) self.failIfEqual(factory, None) gen = factory.createGridGenerator() gen.regular(3) for i in xrange(factory.getStorage().size()): srcLeaf.append(factory.getStorage().get(i).isLeaf()) str = factory.serialize() self.assert_(len(str) > 0) newfac = Grid.unserialize(str) self.failIfEqual(newfac, None) self.assertEqual(factory.getStorage().size(), newfac.getStorage().size()) for i in xrange(factory.getStorage().size()): self.failUnlessEqual(newfac.getStorage().get(i).isLeaf(), srcLeaf[i])
def testSerializationLinearBoudaryBoundingBox(self): """Uses Linear grid for tests""" from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(2) self.failIfEqual(factory, None) gen = factory.createGridGenerator() gen.regular(3) boundingBox = factory.getBoundingBox() tempBound = boundingBox.getBoundary(0) tempBound.leftBoundary = 0.0 tempBound.rightBoundary = 100.0 tempBound.bDirichletLeft = False; tempBound.bDirichletRight = False; boundingBox.setBoundary(0, tempBound) str = factory.serialize() self.assert_(len(str) > 0) newfac = Grid.unserialize(str) self.failIfEqual(newfac, None) self.assertEqual(factory.getStorage().size(), newfac.getStorage().size()) boundingBox = newfac.getBoundingBox() tempBound = boundingBox.getBoundary(0) self.assertEqual(0.0, tempBound.leftBoundary) self.assertEqual(100.0, tempBound.rightBoundary) self.assertEqual(False, tempBound.bDirichletLeft) self.assertEqual(False, tempBound.bDirichletRight)
def testOperationB(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(2) alpha = DataVector(factory.getStorage().size()) p = DataMatrix(1, 1) beta = DataVector(1) alpha.setAll(0.0) p.set(0, 0, 0.25) beta[0] = 1.0 opb = factory.createOperationB() opb.mult(beta, p, alpha) self.failUnlessAlmostEqual(alpha[0], 0.75) self.failUnlessAlmostEqual(alpha[1], 0.25) self.failUnlessAlmostEqual(alpha[2], 0.5) self.failUnlessAlmostEqual(alpha[3], 1.0) self.failUnlessAlmostEqual(alpha[4], 0.0) alpha.setAll(0.0) alpha[2] = 1.0 p.set(0, 0, 0.25) beta[0] = 0.0 opb.multTranspose(alpha, p, beta) self.failUnlessAlmostEqual(beta[0], 0.5)
def testOperationTest_test(self): from pysgpp import Grid, DataVector, DataMatrix factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(1) alpha = DataVector(factory.getStorage().size()) data = DataMatrix(1, 1) data.setAll(0.25) classes = DataVector(1) classes.setAll(1.0) testOP = factory.createOperationTest() alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = 1.0 c = testOP.test(alpha, data, classes) self.failUnless(c > 0.0) alpha[0] = 0.0 alpha[1] = 0.0 alpha[2] = -1.0 c = testOP.test(alpha, data, classes) self.failUnless(c == 0.0)
def test34(self): from pysgpp import Grid, DataVector, FullGrid, FullGridSet dim = 2 level = 9 function = buildParableBoundary(dim) grid = Grid.createLinearBoundaryGrid(dim) testFG(self, grid, level, function)
def testHierarchisationDBoundary(self): from pysgpp import Grid dim = 3 level = 5 function = buildParableBoundary(dim) grid = Grid.createLinearBoundaryGrid(dim) testHierarchisationDehierarchisation(self, grid, level, function)
def testHatRegulardD_two(self): from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(3) m = generateLaplaceMatrix(factory, 4) m_ref = readReferenceMatrix(self, factory.getStorage(), 'data/C_laplace_phi_li_hut_l0_rand_dim_3_nopsgrid_297_float.dat.gz') # compare compareStiffnessMatrices(self, m, m_ref)
def testHatRegular1D_two(self): from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(1) training = buildTrainingVector(readDataVector('data/data_dim_1_nops_8_float.arff.gz')) level = 5 gen = factory.createGridGenerator() gen.regular(level) m = generateBTMatrix(factory, training) m_ref = readReferenceMatrix(self, factory.getStorage(), 'data/BT_phi_li_hut_trapezrand_dim_1_nopsgrid_33_float.dat.gz') # compare compareBTMatrices(self, m, m_ref)
def testHatRegulardD_two(self): from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(3) training = buildTrainingVector(readDataVector('../datasets/BT_BBT/data_dim_3_nops_512_float.arff.gz')) level = 3 gen = factory.getGenerator() gen.regular(level) m = generateBTMatrix(factory, training) m_ref = readReferenceMatrix(self, factory.getStorage(), '../datasets/BT_BBT/BT_phi_li_hut_trapezrand_dim_3_nopsgrid_225_float.dat.gz') # compare compareBTMatrices(self, m, m_ref)
def testGeneration(self): from pysgpp import Grid, DataVector factory = Grid.createLinearBoundaryGrid(2) storage = factory.getStorage() gen = factory.createGridGenerator() self.failIfEqual(gen, None) self.failUnlessEqual(storage.size(), 0) gen.regular(3) self.failUnlessEqual(storage.size(), 37) #This should fail self.failUnlessRaises(Exception, gen.regular, 3)
def testHatRegular1D_one(self): from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(1, 0) training = buildTrainingVector(readDataVector('data/data_dim_1_nops_8_float.arff.gz')) level = 4 gen = factory.createGridGenerator() gen.regular(level) m = generateBTMatrix(factory, training) m_ref = readReferenceMatrix(self, factory.getStorage(), 'data/BT_phi_li_hut_l0_rand_dim_1_nopsgrid_17_float.dat.gz') # compare compareBTMatrices(self, m, m_ref)
def testHatRegulardD_two(self): from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(3) training = buildTrainingVector(readDataVector('data/data_dim_3_nops_512_float.arff.gz')) level = 3 gen = factory.createGridGenerator() gen.regular(level) m = generateBBTMatrix(factory, training) m_ref = readReferenceMatrix(self, factory.getStorage(), 'data/BBT_phi_li_hut_trapezrand_dim_3_nopsgrid_225_float.dat.gz') # compare compareBBTMatrices(self, m, m_ref)
def testOperationEval_eval(self): from pysgpp import Grid, DataVector factory = Grid.createLinearBoundaryGrid(1) gen = factory.createGridGenerator() gen.regular(1) alpha = DataVector(factory.getStorage().size()) alpha.setAll(1.0) p = DataVector(1) p.setAll(0.25) eval = factory.createOperationEval() self.failUnlessAlmostEqual(eval.eval(alpha, p), 1.5)
def eval_fullGrid(level, dim, border=True): if border: grid = Grid.createLinearBoundaryGrid(dim, 1) else: grid = Grid.createLinearGrid(dim) grid.getGenerator().full(level) gs = grid.getStorage() ans = np.ndarray((gs.getSize(), dim)) p = DataVector(dim) for i in range(gs.getSize()): gs.getCoordinates(gs.getPoint(i), p) ans[i, :] = p.array() return ans
def eval_fullGrid(level, dim, border=True): if border: grid = Grid.createLinearBoundaryGrid(dim) else: grid = Grid.createLinearGrid(dim) grid.createGridGenerator().full(level) gs = grid.getStorage() ans = DataMatrix(gs.size(), dim) p = DataVector(dim) for i in xrange(gs.size()): gs.get(i).getCoords(p) ans.setRow(i, p) return ans
def testSerializationLinearBoudary(self): """Uses Linear grid for tests""" from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(2) self.failIfEqual(factory, None) gen = factory.createGridGenerator() gen.regular(3) str = factory.serialize() self.assert_(len(str) > 0) newfac = Grid.unserialize(str) self.failIfEqual(newfac, None) self.assertEqual(factory.getStorage().size(), newfac.getStorage().size())
def testHatRegulardD_one(self): from pysgpp import Grid factory = Grid.createLinearBoundaryGrid(3, 0) training = buildTrainingVector( readDataVector('data/data_dim_3_nops_512_float.arff.gz')) level = 3 gen = factory.createGridGenerator() gen.regular(level) m = generateBBTMatrix(factory, training) m_ref = readReferenceMatrix( self, factory.getStorage(), 'data/BBT_phi_li_hut_l0_rand_dim_3_nopsgrid_123_float.dat.gz') # compare compareBBTMatrices(self, m, m_ref)
def createGrid(dim, level, borderType, isFull=False): from pysgpp.extensions.datadriven.learner.Types import BorderTypes if borderType == BorderTypes.NONE: grid = Grid.createLinearGrid(dim) elif borderType == BorderTypes.TRAPEZOIDBOUNDARY: grid = Grid.createLinearTrapezoidBoundaryGrid(dim) elif borderType == BorderTypes.COMPLETEBOUNDARY: grid = Grid.createLinearBoundaryGrid(dim, 0) else: raise Exception('Unknown border type') # create regular grid of level accLevel gridGen = grid.createGridGenerator() if isFull: gridGen.full(level) else: gridGen.regular(level) return grid
def createGrid(dim, level, borderType, isFull=False): from pysgpp.extensions.datadriven.learner.Types import BorderTypes if borderType == BorderTypes.NONE: grid = Grid.createLinearGrid(dim) elif borderType == BorderTypes.TRAPEZOIDBOUNDARY: grid = Grid.createLinearTrapezoidBoundaryGrid(dim) elif borderType == BorderTypes.COMPLETEBOUNDARY: grid = Grid.createLinearBoundaryGrid(dim, 0) else: raise Exception('Unknown border type') # create regular grid of level accLevel gridGen = grid.getGenerator() if isFull: gridGen.full(level) else: gridGen.regular(level) return grid
def interpolate(f, level, dim, gridType=GridType_Linear, deg=2, trans=None): # create a two-dimensional piecewise bi-linear grid if gridType == GridType_PolyBoundary: grid = Grid.createPolyBoundaryGrid(dim, deg) elif gridType == GridType_Poly: grid = Grid.createPolyGrid(dim, deg) elif gridType == GridType_Linear: grid = Grid.createLinearGrid(dim) elif gridType == GridType_LinearBoundary: grid = Grid.createLinearBoundaryGrid(dim, 1) else: raise AttributeError gridStorage = grid.getStorage() # create regular grid grid.getGenerator().regular(level) # create coefficient vector alpha = DataVector(gridStorage.getSize()) alpha.setAll(0.0) # set function values in alpha x = DataVector(dim) for i in range(gridStorage.getSize()): gp = gridStorage.getPoint(i) gridStorage.getCoordinates(gp, x) p = x.array() if trans is not None: p = trans.unitToProbabilistic(p) if gridStorage.getDimension() == 1: p = p[0] alpha[i] = f(p) # hierarchize createOperationHierarchisation(grid).doHierarchisation(alpha) return grid, alpha
def estimate_density(self, plot=False, c=1.1): # load two moons data set samples = np.loadtxt("data/moon.csv") xmin = c * samples[0, :].min() xmax = c * samples[0, :].max() ymin = c * samples[1, :].min() ymax = c * samples[1, :].max() bounds = np.array([[xmin, xmax], [ymin, ymax]]) grid = Grid.createLinearBoundaryGrid(2) grid.getGenerator().regular(0) alpha = np.ones(grid.getSize()) / 3. dist = SGDEdist(grid, alpha, bounds=np.array([[-2, 1], [0, 1]]), unitIntegrand=False, isPositive=True) # dist = SGDEdist.byLearnerSGDEConfig(samples.T, # bounds=bounds, # config={"grid_level": 7, # "grid_type": "linear", # "grid_maxDegree": 1, # "refinement_numSteps": 0, # "refinement_numPoints": 10, # "solver_threshold": 1e-10, # "solver_verbose": False, # "regularization_type": "Laplace", # "crossValidation_enable": False, # "crossValidation_lambda": 3.16228e-06, # "crossValidation_kfold": 5, # "crossValidation_silent": True, # "sgde_makePositive": True, # "sgde_makePositive_candidateSearchAlgorithm": "joined", # "sgde_makePositive_interpolationAlgorithm": "setToZero", # "sgde_unitIntegrand": True}) return dist
def discretizeFunction(f, bounds, level=2, hasBorder=False, *args, **kws): # define linear transformation to the unit hyper cube T = JointTransformation() for xlim in bounds: T.add(LinearTransformation(xlim[0], xlim[1])) # create grid dim = len(bounds) # create adequate grid if hasBorder: grid = Grid.createLinearBoundaryGrid(dim) else: grid = Grid.createLinearGrid(dim) # init storage grid.createGridGenerator().regular(level) gs = grid.getStorage() # discretize on given level p = DataVector(dim) nodalValues = DataVector(gs.size()) for i in xrange(gs.size()): gs.get(i).getCoords(p) # transform to the right space q = T.unitToProbabilistic(p.array()) # apply the given function nodalValues[i] = float(f(q)) # hierarchize alpha = hierarchize(grid, nodalValues) # estimate the l2 error err = estimateDiscreteL2Error(grid, alpha, f) # TODO: adaptive refinement return grid, alpha, err
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join(pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join(pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join(pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join(pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join(pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join(pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join(pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join(pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError('optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = {'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy} # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join(pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join(pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join(pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join(pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({'filename': statsfilename, 'data': DataMatrix(np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': ['sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy']}) # ----------------------------------------------------------- return ans
def createGrid(self): """ Creates the specified grid """ grid = None if self.__file is not None and os.path.exists(self.__file): gridFormatter = GridFormatter() grid = gridFormatter.deserializeFromFile(self.__file) else: if self.__grid is not None: self.__dim = self.__grid.getStorage().dim() if (self.__dim is None or self.level is None) and self.__grid is None: raise AttributeError("Not all attributes assigned to create\ grid") if self.__border is not None: if self.__border == BorderTypes.TRAPEZOIDBOUNDARY: if self.__deg > 1: grid = Grid.createPolyBoundaryGrid( self.__dim, self.__deg) else: grid = Grid.createLinearBoundaryGrid(self.__dim) elif self.__border == BorderTypes.COMPLETEBOUNDARY: if self.__deg > 1: raise NotImplementedError() else: grid = Grid.createLinearBoundaryGrid(self.__dim, 0) else: if self.__deg > 1: grid = Grid.createModPolyGrid(self.__dim, self.__deg) else: grid = Grid.createModLinearGrid(self.__dim) else: # no border points if self.__deg > 1: grid = Grid.createPolyGrid(self.__dim, self.__deg) else: grid = Grid.createLinearGrid(self.__dim) # generate the grid if self.level is not None: generator = grid.createGridGenerator() if not self.__full: generator.regular(self.level) else: generator.full(self.level) # if there is a grid specified, add all the missing points if self.__grid is not None: gs = grid.getStorage() copygs = self.__grid.getStorage() # insert grid points for i in xrange(copygs.size()): gp = copygs.get(i) # insert grid point if not gs.has_key(gp): gs.insert(HashGridIndex(gp)) if self.__border == BorderTypes.TRAPEZOIDBOUNDARY: insertTruncatedBorder(grid, gp) gs.recalcLeafProperty() return grid
def estimateDensitySGDE(trainSamplesUnit, testSamplesUnit=None, testSamplesProb=None, pathResults="/tmp", dist=None, optimization='l2', iteration=0, levels=[1, 2, 3, 4, 5], refNr=0, refPoints=0, nSamples=1000): """ Estimates a sparse grid density for different levels and refinements by optimizing over a given quantity. @param trainSamplesUnit: @param testSamplesUnit: @param testSamplesProb: @param pathResults: @param dist: @param optimization: @param iteration: @param levels: @param refNr: @param refPoints: """ config = """ [general] method = dmest [files] inFileTrain = %s usingTrain = %s inFileTest = %s outFileTest = %s usingTest = %s [dmest] gridFile = %s lambda = -1 # 0.01 regType=Laplace refNr = %i refPoints = %i writeGridFile = %s writeAlphaFile = %s samp_rejectionTrialMax = 5000 samp_numSamples = %i samp_outFile = %s printSurfaceFile = %s """ # write the samples to file if len(trainSamplesUnit.shape) == 1: n, dim = trainSamplesUnit.shape[0], 1 usingTrainTag = "%i" % dim else: n, dim = trainSamplesUnit.shape usingTrainTag = "1:%i" % dim trainSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_train.csv" % (iteration, n)) np.savetxt(trainSamplesUnitFile, trainSamplesUnit) testSamplesUnitFile = "" usingTestTag = "" if testSamplesUnit is not None: testSamplesUnitFile = os.path.join( pathResults, "samples_%i_%i_test.csv" % (iteration, n)) if dim == 1: usingTestTag = "%i" % dim else: usingTestTag = "1:%i" % dim np.savetxt(testSamplesUnitFile, testSamplesUnit) # collector arrays accGridSizes = np.array([]) accLevels = np.array([]) accL2error = np.array([]) accCrossEntropy = np.array([]) accKLDivergence = np.array([]) # best estimation ans = None bestMeasure = 1e20 bestSetting = None for level in levels: # define output files gridFile = os.path.join( pathResults, "samples_%i_%i_l%i.grid" % (iteration, n, level)) alphaFile = os.path.join( pathResults, "samples_%i_%i_l%i.alpha.arff" % (iteration, n, level)) sampleFile = os.path.join( pathResults, "samples_%i_%i_l%i.csv" % (iteration, n, level)) likelihoodFile = "" if testSamplesUnit is not None: likelihoodFile = os.path.join( pathResults, "samples_%i_%i_l%i_likelihood.csv" % (iteration, n, level)) surfaceFile = "" if dim == 2: surfaceFile = os.path.join( pathResults, "samples_%i_%i_l%i.xyz" % (iteration, n, level)) gnuplotJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_gnuplot.jpg" % (iteration, n, level)) sgdeJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgde.jpg" % (iteration, n, level)) sgdePositiveJpegFile = os.path.join( pathResults, "samples_%i_%i_l%i_sgdePositive.jpg" % (iteration, n, level)) configFile = os.path.join(pathResults, "sgde_%i_%i_l%i.cfg" % (iteration, n, level)) gnuplotConfig = os.path.join( pathResults, "sgde_%i_%i_l%i.gnuplot" % (iteration, n, level)) # generate the grid grid = Grid.createLinearBoundaryGrid(dim) grid.createGridGenerator().regular(level) if grid.getSize() <= n: print " l=%i" % level, fd = open(gridFile, "w") fd.write(grid.serialize()) fd.close() # write config to file fd = open(configFile, "w") fd.write(config % (trainSamplesUnitFile, usingTrainTag, testSamplesUnitFile, likelihoodFile, usingTestTag, gridFile, refNr, refPoints, gridFile, alphaFile, nSamples, sampleFile, surfaceFile)) fd.close() sgdeDist = SGDEdist.byConfig(configFile) grid, alpha = sgdeDist.grid, sgdeDist.alpha # ----------------------------------------------------------- # do some plotting if dim == 2: # gnuplot sgdeDist.gnuplot(gnuplotJpegFile, gnuplotConfig=gnuplotConfig) # ----------------------------------------------------------- # matplotlib l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdeJpegFile) plt.close(fig) # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i_sgde.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) # ----------------------------------------------------------- # # make it positive and do all over again # opPositive = OperationMakePositive(sgdeDist.grid) # alg = EstimateDensityAlgorithm(configFile) # opPositive.setInterpolationAlgorithm(alg) # grid, alpha = opPositive.makePositive(sgdeDist.alpha) # scale to unit integrand alpha.mult(1. / createOperationQuadrature(grid).doQuadrature(alpha)) sgdeDist.grid = grid sgdeDist.alpha = alpha gridFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.grid" % (iteration, n, level)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_l%i_positive.alpha.arff" % (iteration, n, level)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(grid)) fd.close() writeAlphaARFF(alphaFileNew, alpha) # ----------------------------------------------------------- # collect statistics accGridSizes = np.append(accGridSizes, grid.getSize()) accLevels = np.append(accLevels, level) l2error = np.NAN kldivergence = np.NAN crossEntropy = sgdeDist.crossEntropy(testSamplesUnit) if dist is not None: l2error = dist.l2error(sgdeDist, testSamplesUnit, testSamplesProb) kldivergence = dist.klDivergence(sgdeDist, testSamplesUnit, testSamplesProb) accL2error = np.append(accL2error, l2error) accCrossEntropy = np.append(accCrossEntropy, crossEntropy) accKLDivergence = np.append(accKLDivergence, kldivergence) if dim == 2: # ----------------------------------------------------------- # do some plotting fig = plt.figure() plotSG2d(grid, alpha) plt.title("N=%i: vol=%g, kl=%g, log=%g, l2error=%g" % (grid.getSize(), doQuadrature(grid, alpha), kldivergence, crossEntropy, l2error)) fig.savefig(sgdePositiveJpegFile) plt.close(fig) # ----------------------------------------------------------- # select the best density available based on the given criterion if optimization == 'crossEntropy': measure = crossEntropy elif optimization == 'kldivergence': measure = kldivergence elif optimization == 'l2': measure = l2error else: raise AttributeError( 'optimization "%s" is not known for density estimation' % optimization) isBest = measure < bestMeasure if isBest: bestMeasure = measure if ans is None or isBest: ans = sgdeDist bestSetting = { 'level': level, 'gridSize': grid.getSize(), 'l2error': l2error, 'KLDivergence': kldivergence, 'crossEntropy': crossEntropy } # ----------------------------------------------------------- # copy grid and coefficients gridFileNew = os.path.join( pathResults, "samples_%i_%i.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i.alpha.arff" % (iteration, n)) sampleFileNew = os.path.join( pathResults, "samples_%i_%i.csv" % (iteration, n)) copy2(gridFile, gridFileNew) copy2(alphaFile, alphaFileNew) copy2(sampleFile, sampleFileNew) gridFileNew = os.path.join( pathResults, "samples_%i_%i_positive.grid" % (iteration, n)) alphaFileNew = os.path.join( pathResults, "samples_%i_%i_positive.alpha.arff" % (iteration, n)) fd = open(gridFileNew, "w") fd.write(Grid.serialize(ans.grid)) fd.close() writeAlphaARFF(alphaFileNew, ans.alpha) # ----------------------------------------------------------- print ": %s = %g <= %g" % (optimization, measure, bestMeasure) print # ----------------------------------------------------------- # write results to file statsfilename = os.path.join( pathResults, "sg_sgde_%i_%i_all.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack( ([n] * len(accGridSizes), accGridSizes, accLevels, accL2error, accKLDivergence, accCrossEntropy)).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- statsfilename = os.path.join(pathResults, "sg_sgde_%i_%i.stats.arff" % (iteration, n)) writeDataARFF({ 'filename': statsfilename, 'data': DataMatrix( np.vstack(([n], bestSetting['gridSize'], bestSetting['level'], bestSetting['l2error'], bestSetting['KLDivergence'], bestSetting['crossEntropy'])).transpose()), 'names': [ 'sampleSize', 'gridSize', 'level', 'l2error', 'KLDivergence', 'crossEntropy' ] }) # ----------------------------------------------------------- return ans