Exemplo n.º 1
0
def compareBTMatrices(m1, m2):
    n = m1.getSize()  # lines
    m = m1.getDim()  # columns

    # check row sum
    v = DataVector(m)
    values = []
    for i in range(n):
        m1.getRow(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        print((values_ref[i], values[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(m):
        m1.getColumn(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(m):
        m2.getColumn(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(m):
        print((values_ref[i], values[i]))
Exemplo n.º 2
0
def compareBBTMatrices(testCaseClass, m1, m2):
    from pysgpp import DataVector, cvar

    places = 5 if cvar.USING_DOUBLE_PRECISION else 3

    # check dimensions
    testCaseClass.assertEqual(m1.getNrows(), m1.getNcols())
    testCaseClass.assertEqual(m1.getNrows(), m2.getNrows())
    testCaseClass.assertEqual(m1.getSize(), m2.getSize())

    n = m1.getNrows()

    # check diagonal
    values = []
    for i in range(n):
        values.append(m1.get(i, i))
    values.sort()
    values_ref = []
    for i in range(n):
        values_ref.append(m2.get(i, i))
    values_ref.sort()
    for i in range(n):
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Diagonal %f != %f" % (values[i], values_ref[i]))

    # check row sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getRow(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        # print values_ref[i], values[i]
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Row sum %f != %f" % (values[i], values_ref[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getColumn(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getColumn(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], places=places, msg="Col sum %f != %f" % (values[i], values_ref[i]))
Exemplo n.º 3
0
def compareBTMatrices(testCaseClass, m1, m2):
    from pysgpp import DataVector

    places = 5

    # check dimensions
    testCaseClass.assertEqual(m1.getNrows(), m2.getNrows())
    testCaseClass.assertEqual(m1.getNcols(), m2.getNcols())

    n = m1.getNrows()  # lines
    m = m1.getNcols()  # columns

    # check row sum
    v = DataVector(m)
    values = []
    for i in range(n):
        m1.getRow(i, v)
        values.append(v.sum())

    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i, v)
        values_ref.append(v.sum())

    values_ref.sort()
    for i in range(n):
        #print values_ref[i], values[i]
        testCaseClass.assertAlmostEqual(values[i],
                                        values_ref[i],
                                        places=places,
                                        msg="Row sum %f != %f" %
                                        (values[i], values_ref[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(m):
        m1.getColumn(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(m):
        m2.getColumn(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(m):
        testCaseClass.assertAlmostEqual(values[i],
                                        values_ref[i],
                                        places=places,
                                        msg="Col sum %f != %f" %
                                        (values[i], values_ref[i]))
Exemplo n.º 4
0
def compareStiffnessMatrices(testCaseClass, m1, m2):
    from pysgpp import DataVector

    # check dimensions
    testCaseClass.assertEqual(m1.getSize(), m1.getDim())
    testCaseClass.assertEqual(m1.getSize(), m2.getSize())
    testCaseClass.assertEqual(m1.getDim(), m2.getDim())

    n = m1.getSize()

    # check diagonal
    values = []
    for i in range(n):
        values.append(m1[i*n + i])
    values.sort()
    values_ref = []
    for i in range(n):
        values_ref.append(m2[i*n + i])
    values_ref.sort()
    for i in range(n):
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], msg="Diagonal %f != %f" % (values[i], values_ref[i]))

    # check row sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getRow(i,v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i,v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        #print values_ref[i], values[i]
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], msg="Row sum %f != %f" % (values[i], values_ref[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getColumn(i,v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getColumn(i,v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], msg="Col sum %f != %f" % (values[i], values_ref[i]))
Exemplo n.º 5
0
def generateLaplaceMatrix(factory, level, verbose=False):
    from pysgpp import DataVector
    storage = factory.getStorage()
    
    gen = factory.createGridGenerator()
    gen.regular(level)
    
    laplace = factory.createOperationLaplace()
    
    # create vector
    alpha = DataVector(storage.size())
    erg = DataVector(storage.size())

    # create stiffness matrix
    m = DataVector(storage.size(), storage.size())
    m.setAll(0)
    for i in xrange(storage.size()):
        # apply unit vectors
        alpha.setAll(0)
        alpha[i] = 1
        laplace.mult(alpha, erg)
        if verbose:
            print erg, erg.sum()
        m.setColumn(i, erg)

    return m
    def calc_indicator_value(self, index):

        numData = self.trainData.getNrows()
        numCoeff = self.grid.getSize()
        seq = self.grid.getStorage().seq(index)

        num = 0
        denom = 0

        tmp = DataVector(numCoeff)
        self.multEval.multTranspose(self.errors, tmp)

        num = tmp.__getitem__(seq)
        num **= 2

        alpha = DataVector(numCoeff)
        col = DataVector(numData)
        alpha.__setitem__(seq, 1.0)
        self.multEval.mult(alpha, col)

        col.sqr()

        denom = col.sum()

        if denom == 0:
            print("Denominator is zero")
            value = 0
        else:
            value = num / denom

        return value
    def calc_indicator_value(self, index):

        numData = self.trainData.getNrows()
        numCoeff = self.grid.getSize()
        seq = self.grid.getStorage().seq(index)

        num = 0
        denom = 0

        tmp = DataVector(numCoeff)
        self.multEval.multTranspose(self.errors, tmp) 

        num = tmp.__getitem__(seq)
        num **= 2

        alpha = DataVector(numCoeff)
        col = DataVector(numData)
        alpha.__setitem__(seq, 1.0)
        self.multEval.mult(alpha, col)

        col.sqr()

        denom = col.sum()

        if denom == 0:
            print "Denominator is zero"
            value = 0
        else:
            value = num/denom 

        return value
Exemplo n.º 8
0
def compareBBTMatrices(m1, m2):
    # check dimensions

    n = m1.getSize()

    # check diagonal
    values = []
    for i in range(n):
        values.append(m1[i * n + i])
    values.sort()
    values_ref = []
    for i in range(n):
        values_ref.append(m2[i * n + i])
    values_ref.sort()

    for i in range(n):
        print((values_ref[i], values[i]))

    # check row sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getRow(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        print((values_ref[i], values[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getColumn(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getColumn(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        print((values_ref[i], values[i]))
Exemplo n.º 9
0
def compareBTMatrices(testCaseClass, m1, m2):
    from pysgpp import DataVector

    # check dimensions
    testCaseClass.assertEqual(m1.getNrows(), m2.getNrows())
    testCaseClass.assertEqual(m1.getNcols(), m2.getNcols())

    n = m1.getNrows() # lines
    m = m1.getNcols() # columns

    # check row sum
    v = DataVector(m)
    values = []
    for i in range(n):
        m1.getRow(i,v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i,v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        #print values_ref[i], values[i]
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], 5, msg="Row sum %f != %f" % (values[i], values_ref[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(m):
        m1.getColumn(i,v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(m):
        m2.getColumn(i,v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(m):
        testCaseClass.assertAlmostEqual(values[i], values_ref[i], 5, msg="Col sum %f != %f" % (values[i], values_ref[i]))
Exemplo n.º 10
0
    def naive_calc_single(self, index):

        numData = self.trainData.getNrows()
        numCoeff = self.grid.getSize()
        seq = self.grid.getStorage().seq(index)
        num = 0
        denom = 0

        tmp = DataVector(numCoeff)
        self.multEval.multTranspose(self.errors, tmp)

        num = tmp.__getitem__(seq)
        num **= 2

        alpha = DataVector(numCoeff)
        alpha.setAll(0.0)
        alpha.__setitem__(seq, 1.0)

        col = DataVector(numData)
        self.multEval.mult(alpha, col)

        print col

        col.sqr()

        denom = col.sum()

        print num
        print denom

        if denom == 0:
            print "Denominator is zero"
            value = 0
        else:
            value = num / denom

        return value
Exemplo n.º 11
0
class Regressor(Learner):

    ## Errors per basis function
    errors = None

    ## Error vector
    error = None

    ##constructor
    def __init__(self):
        super(Regressor, self).__init__()

    ##calculate L2-norm of error
    # @return: last L2-norm of error
    def getL2NormError(self):
        return sqrt(self.error.sum())

    ## calculate max error
    # @return: max error
    def getMaxError(self):
        return sqrt(self.error.max())

    ## calculate min error
    # @return: min error
    def getMinError(self):
        return sqrt(self.error.min())

    ## Evaluate regression MSE
    #
    # @param data: DataContainer dataset
    # @param alpha: DataVector alpha-vector
    # @return: mean square error
    def evalError(self, data, alpha):
        size = data.getPoints().getNrows()
        if size == 0: return 0

        self.error = DataVector(size)
        self.specification.getBOperator(data.getName()).mult(alpha, self.error)
        self.error.sub(data.getValues())  # error vector
        self.error.sqr()  # entries squared
        errorsum = self.error.sum()
        mse = errorsum / size  # MSE

        # calculate error per basis function
        self.errors = DataVector(len(alpha))
        self.specification.getBOperator(data.getName()).multTranspose(
            self.error, self.errors)
        self.errors.componentwise_mult(alpha)

        return mse

    ##Update different statistics about training progress
    # @param alpha: DataVector alpha-vector
    # @param trainSubset: DataContainer with training data
    # @param testSubset: DataContainer with validation data, default value: None
    def updateResults(self, alpha, trainSubset, testSubset=None):
        self.knowledge.update(alpha)
        #eval Error for training data and append it to other in this iteration
        self.trainAccuracy.append(self.evalError(trainSubset, alpha))

        i = float(len(self.trainAccuracy))

        #eval error for test data and append it to other in this iteration
        if testSubset != None:
            self.testAccuracy.append(self.evalError(testSubset, alpha))
            self.testingOverall.append(sum(self.testAccuracy) / i)

        self.trainingOverall.append(sum(self.trainAccuracy) / i)

        self.numberPoints.append(self.grid.getSize())

    ##Refines grid with the number of points as specified in corresponding TrainingSpecification object
    def refineGrid(self):
        self.notifyEventControllers(LearnerEvents.REFINING_GRID)

        pointsNum = self.specification.getNumOfPointsToRefine(
            self.grid.getGenerator().getNumberOfRefinablePoints())
        self.grid.getGenerator().refine(
            SurplusRefinementFunctor(self.errors, pointsNum,
                                     self.specification.getAdaptThreshold()))
Exemplo n.º 12
0
class Regressor(Learner):
    """
    Subclass of Learner, responsible for regression.
    The methods specific for regression are implemented here.
    """
    def __init__(self):
        """
        Constructor
        """
        super(self.__class__, self).__init__()
        # Errors per basis function
        self.errors = None
        # Error vector
        self.error = None

    def __getattr__(self, attr):
        """
        Overrides built-in method if method called is not a object
        method of this Descriptor, most probably it's a method of
        the learner so it tries to call the method
        from our specification
        @param attr: string method name
        @return: method call in specification
        """
        return getattr(self.specification, attr)

    # # Learn data from training data set and use validation data set to prevent overfitting
    #
    # @param dataset: DataContainer object with data sets, default value None (initialized data set used)
    # @return: DataVector of alpha
    def learnDataWithTest(self, dataset=None):
        self.notifyEventControllers(
            LearnerEvents.LEARNING_WITH_TESTING_STARTED)
        B = createOperationMultipleEval(
            self.grid,
            self.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY))
        self.specification.setBOperator(B)

        if dataset is None:
            dataset = self.dataContainer

        # learning step
        trainSubset = dataset.getTrainDataset()
        # testpoint = data.allPoint\points
        # testvalues = data.allValues\values
        testSubset = dataset.getTestDataset()

        while True:  # repeat until policy says "stop"
            self.notifyEventControllers(
                LearnerEvents.LEARNING_WITH_TESTING_STEP_STARTED)

            self.alpha = self.doLearningIteration(trainSubset)

            # calculate avg. error for training and test data and avg. for refine alpha
            self.updateResults(self.alpha, trainSubset, testSubset)

            self.notifyEventControllers(
                LearnerEvents.LEARNING_WITH_TESTING_STEP_COMPLETE)

            self.iteration += 1

            if self.stopPolicy.isTrainingComplete(self):
                break

            # refine grid
            self.refineGrid()

        self.notifyEventControllers(
            LearnerEvents.LEARNING_WITH_TESTING_COMPLETE)
        return self.alpha

    # # Simple data learning
    #
    # @return: DataVector of alpha
    def learnData(self):
        self.notifyEventControllers(LearnerEvents.LEARNING_STARTED)
        self.specification.setBOperator(
            createOperationMultipleEval(
                self.grid,
                self.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY)))
        print(self.getL())
        while True:  # repeat until policy says "stop"
            print(
                "Learning %i/%i" %
                (self.iteration, self.stopPolicy.getAdaptiveIterationLimit()))
            self.notifyEventControllers(LearnerEvents.LEARNING_STEP_STARTED)
            # learning step
            self.alpha = self.doLearningIteration(self.dataContainer)

            # calculate avg. error for training and test data and avg. for refine alpha
            self.updateResults(self.alpha, self.dataContainer)
            self.notifyEventControllers(LearnerEvents.LEARNING_STEP_COMPLETE)
            self.iteration += 1
            if (self.stopPolicy.isTrainingComplete(self)):
                break
            # refine grid
            self.refineGrid()

#         from pysgpp.extensions.datadriven.uq.plot import plotNodal3d
#         plotNodal3d(self.grid, self.alpha)
#         data = self.dataContainer.getPoints('train').array()
#         fig = plt.figure()
#         plt.plot(data[:, 0], data[:, 1], ' ', marker='v')
#         fig.show()
#         plt.show()

        self.notifyEventControllers(LearnerEvents.LEARNING_COMPLETE)
        return self.alpha

    # # Learn data with cross-fold validation
    #
    # @return: list of DataVector alpha in different folds
    def learnDataWithFolding(self, ):
        self.notifyEventControllers(
            LearnerEvents.LEARNING_WITH_FOLDING_STARTED)
        self.specification.setBOperator(
            createOperationMultipleEval(
                self.grid,
                self.dataContainer.getPoints(DataContainer.TRAIN_CATEGORY)))
        # update folding
        self.updateFoldingPolicy()
        alphas = []
        for dataset in self.foldingPolicy:
            alphas.append(self.learnDataWithTest(dataset))

        self.notifyEventControllers(
            LearnerEvents.LEARNING_WITH_FOLDING_COMPLETE)
        return alphas

    # # Perform one learning step
    #
    # @param set: DataContainer training data set
    # @return: DataVector alpha vector
    def doLearningIteration(self, set):
        # initialize values
        self.linearSystem = DMSystemMatrix(self.grid, set.getPoints(),
                                           self.specification.getCOperator(),
                                           self.specification.getL())
        size = self.grid.getSize()
        # Reuse data from old alpha vector increasing its dimension
        if self.solver.getReuse() and self.alpha is not None:
            alpha = DataVector(self.alpha)
            alpha.resize(size)
        # Use new alpha vector
        else:
            alpha = DataVector(size)
            alpha.setAll(0.0)
        b = DataVector(size)
        self.linearSystem.generateb(set.getValues(), b)
        # calculates alphas
        self.solver.solve(self.linearSystem, alpha, b, self.solver.getReuse(),
                          False, self.solver.getThreshold())

        return alpha

    def getL2NormError(self):
        """
        calculate L2-norm of error
        @return: last L2-norm of error
        """
        return np.sqrt(self.error.sum())

    def getMaxError(self):
        """
        calculate max error
        @return: max error
        """
        return np.sqrt(self.error.max())

    def getMinError(self):
        """
        calculate min error
        @return: min error
        """
        return np.sqrt(self.error.min())

    def evalError(self, data, alpha):
        """
        Evaluate regression MSE
        @param data: DataContainer data set
        @param alpha: DataVector alpha-vector
        @return: mean square error
        """
        size = data.getPoints().getNrows()
        if size == 0:
            return 0

        self.error = DataVector(size)
        self.getBOperator().mult(alpha, self.error)
        # error vector
        self.error.sub(data.getValues())
        # entries squared
        self.error.sqr()
        errorsum = self.error.sum()
        # MSE
        mse = errorsum / size

        # calculate error per basis function
        self.errors = DataVector(len(alpha))
        self.getBOperator().multTranspose(self.error, self.errors)
        self.errors.componentwise_mult(alpha)

        # calculate error per basis function
        #        self.errors = DataVector(alpha.getSize())
        #        self.specification.getBOperator().mult(self.error, data.getPoints(), self.errors)

        return mse

    def updateResults(self, alpha, trainSubset, testSubset=None):
        """
        Update different statistics about training progress
        @param alpha: DataVector alpha-vector
        @param trainSubset: DataContainer with training data
        @param testSubset: DataContainer with validation data
        """
        # self.knowledge.update(alpha)
        # eval Error for training data and append it to other in this iteration
        self.trainAccuracy.append(self.evalError(trainSubset, alpha))

        i = float(len(self.trainAccuracy))

        # eval error for test data and append it to other in this iteration
        if testSubset is not None:
            self.testAccuracy.append(self.evalError(testSubset, alpha))
            self.testingOverall.append(sum(self.testAccuracy) / i)

        self.trainingOverall.append(sum(self.trainAccuracy) / i)

        self.numberPoints.append(self.grid.getSize())

    def refineGrid(self):
        """
        Refines grid with the number of points as specified in corresponding
        TrainingSpecification object
        """
        self.notifyEventControllers(LearnerEvents.REFINING_GRID)
        refinableNum = self.grid.getGenerator().getNumberOfRefinablePoints()
        pointsNum = self.getNumOfPointsToRefine(refinableNum)
        functor = SurplusRefinementFunctor(self.errors, pointsNum,
                                           self.getAdaptThreshold())
        self.grid.getGenerator().refine(functor)
Exemplo n.º 13
0
class TestDataVector(unittest.TestCase):

    ## 
    # Set up, create random DataVector and corresponding Python data structures.
    # @test DataVector::get(), DataVector::set()
    def setUp(self):
        from pysgpp import DataVector
        import random

        ## number of rows
        self.nrows = 5
        ## number of columns
        self.ncols = 4
        ## number of entries
        self.N = self.nrows*self.ncols
        ## random list of lists
        self.l_rand = [[2*(random.random()-0.5) for j in xrange(self.ncols)] for i in xrange(self.nrows)]
        ## same as l_rand, but flattened
        self.l_rand_total = []
        for li in self.l_rand:
            self.l_rand_total.extend(li)
#        ## Data Vector, corresponding to l_rand
#        self.d_rand = DataVector(self.nrows,self.ncols)
#        for i in xrange(self.N):
#            self.d_rand[i] = self.l_rand_total[i]
#
#        for i in xrange(self.N):
#            self.assertEqual(self.d_rand[i], self.l_rand_total[i])
        ## Data Vector, corresponding to l_rand
        self.d_rand = DataVector(self.N)
        for i in xrange(self.N):
            self.d_rand[i] = self.l_rand_total[i]

        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i], self.l_rand_total[i])

    ##
    # Constructors4.
    # @test DataVector::DataVector(size_t size), DataVector::DataVector(size_t size, size_t dim), DataVector::DataVector(DataVectorDefinition &DataVectorDef), DataVector::getSize(), DataVector::getDim(), DataVector::getSize()
    # @todo (pflueged) DataVector::DataVector(double *input, size_t size, size_t dim)
    def testConstructor(self):
        from pysgpp import DataVector
        
        d = DataVector(2)
        self.assertEqual(len(d), 2) # getSize()
        
#        d = DataVector(2,3)
#        self.assertEqual(d.getSize(), 2)
#        self.assertEqual(d.getDim(), 3)
#        self.assertEqual(len(d), 2*3) # getSize()
#
#        d2 = DataVector(self.d_rand)
#        for i in xrange(self.N):
#            self.assertEqual(d2[i], self.d_rand[i])
#        self.assertEqual(d2.getSize(), self.nrows)
#        self.assertEqual(d2.getDim(), self.ncols)
#        self.assertEqual(len(d2), self.N)
#        d2[self.ncols] = -4.0
#        self.assertNotEqual(d2[self.ncols], self.d_rand[self.ncols])

    ##
    # Min, Max operations.
    # @test DataVector::min(int d), DataVector::max(int d), DataVector::minmax(int d, double *min, double *max), DataVector::min(), DataVector::max()
    def testMinMax(self):

#        # test dimension-dependent min, max
#        for j in xrange(self.ncols):
#            minj = min([self.l_rand[i][j] for i in xrange(self.nrows)])
#            maxj = max([self.l_rand[i][j] for i in xrange(self.nrows)])
#            self.assertEqual(self.d_rand.min(j), minj)
#            self.assertEqual(self.d_rand.max(j), maxj)
#            mi, ma = self.d_rand.minmax(j)
#            self.assertEqual(mi, minj)
#            self.assertEqual(ma, maxj)

        # test global min, max
        self.assertEqual(self.d_rand.min(), min(self.l_rand_total))
        self.assertEqual(self.d_rand.max(), max(self.l_rand_total))
   

    ##
    # Operations on DataVectors.
    # @test DataVector::sum(), DataVector::sqr(), DataVector::abs(), DataVector::componentwise_mult(), DataVector::componentwise_div()
    def testOps(self):
        from pysgpp import DataVector
        # sum
        self.assertAlmostEqual(self.d_rand.sum(), sum(self.l_rand_total))

        # sqr
        d = DataVector(self.d_rand)
        d.sqr()
        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i]**2, d[i])

        # abs
        d = DataVector(self.d_rand)
        d.abs()
        for i in xrange(self.N):
            self.assertEqual(abs(self.d_rand[i]), d[i])

        # componentwise_mult
        d = DataVector(self.d_rand)
#	d2 = DataVector(self.nrows, self.ncols)
	d2 = DataVector(self.N)
        for i in xrange(self.N):
            d2[i] = i
	d.componentwise_mult(d2)
        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i]*i, d[i])

        # componentwise_div
        d = DataVector(self.d_rand)
        for i in xrange(self.N):
            d2[i] = i+1
	d.componentwise_div(d2)
        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i]/(i+1), d[i])

    ##
    # Vector-Operations
    # @test DataVector::dotProduct(DataVector &vec)
    def testDotProduct(self):
        from pysgpp import DataVector
        
        x = 0
        
        d = DataVector(3)
        for i in xrange(len(d)):
            d[i] = i + 1
            x += d[i] * d[i]
            
        self.assertEqual(d.dotProduct(d), x)
Exemplo n.º 14
0
def compareBBTMatrices(testCaseClass, m1, m2):
    from pysgpp import DataVector, cvar

    places = 5 if cvar.USING_DOUBLE_PRECISION else 3

    # check dimensions
    testCaseClass.assertEqual(m1.getNrows(), m1.getNcols())
    testCaseClass.assertEqual(m1.getNrows(), m2.getNrows())
    testCaseClass.assertEqual(m1.getSize(), m2.getSize())

    n = m1.getNrows()

    # check diagonal
    values = []
    for i in range(n):
        values.append(m1.get(i, i))
    values.sort()
    values_ref = []
    for i in range(n):
        values_ref.append(m2.get(i, i))
    values_ref.sort()
    for i in range(n):
        testCaseClass.assertAlmostEqual(values[i],
                                        values_ref[i],
                                        places=places,
                                        msg="Diagonal %f != %f" %
                                        (values[i], values_ref[i]))

    # check row sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getRow(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getRow(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        # print values_ref[i], values[i]
        testCaseClass.assertAlmostEqual(values[i],
                                        values_ref[i],
                                        places=places,
                                        msg="Row sum %f != %f" %
                                        (values[i], values_ref[i]))

    # check col sum
    v = DataVector(n)
    values = []
    for i in range(n):
        m1.getColumn(i, v)
        values.append(v.sum())
    values.sort()
    values_ref = []
    for i in range(n):
        m2.getColumn(i, v)
        values_ref.append(v.sum())
    values_ref.sort()
    for i in range(n):
        testCaseClass.assertAlmostEqual(values[i],
                                        values_ref[i],
                                        places=places,
                                        msg="Col sum %f != %f" %
                                        (values[i], values_ref[i]))
Exemplo n.º 15
0
class Regressor(Learner):
    
    ## Errors per basis function
    errors = None 
    
    ## Error vector
    error = None
    
    
    ##constructor
    def __init__(self):
        super(Regressor,self).__init__()
       
        
    ##calculate L2-norm of error
    # @return: last L2-norm of error
    def getL2NormError(self):
        return sqrt(self.error.sum())
    
    
    ## calculate max error
    # @return: max error
    def getMaxError(self):
        return sqrt(self.error.max())
    
    
    ## calculate min error
    # @return: min error
    def getMinError(self):
        return sqrt(self.error.min())
    
    
    ## Evaluate regression MSE
    #
    # @param data: DataContainer dataset
    # @param alpha: DataVector alpha-vector
    # @return: mean square error
    def evalError(self, data, alpha):
        size = data.getPoints().getNrows()
        if size == 0: return 0
        
        self.error = DataVector(size)
        self.specification.getBOperator(data.getName()).mult(alpha, self.error)
        self.error.sub(data.getValues()) # error vector
        self.error.sqr() # entries squared
        errorsum = self.error.sum()
        mse = errorsum / size # MSE
        
        # calculate error per basis function
        self.errors = DataVector(len(alpha))
        self.specification.getBOperator(data.getName()).multTranspose(self.error, self.errors)
        self.errors.componentwise_mult(alpha)
        
        return mse
    
    
    ##Update different statistics about training progress
    # @param alpha: DataVector alpha-vector
    # @param trainSubset: DataContainer with training data
    # @param testSubset: DataContainer with validation data, default value: None
    def updateResults(self, alpha, trainSubset, testSubset = None):
        self.knowledge.update(alpha)
        #eval Error for training data and append it to other in this iteration
        self.trainAccuracy.append(self.evalError(trainSubset, alpha))
        
        i = float(len(self.trainAccuracy))
        
        #eval error for test data and append it to other in this iteration
        if testSubset != None:  
            self.testAccuracy.append(self.evalError(testSubset, alpha))
            self.testingOverall.append(sum(self.testAccuracy)/i)
            
        self.trainingOverall.append(sum(self.trainAccuracy)/i)

        self.numberPoints.append(self.grid.getSize())
    
    
    ##Refines grid with the number of points as specified in corresponding TrainingSpecification object
    def refineGrid(self):
        self.notifyEventControllers(LearnerEvents.REFINING_GRID)
        
        pointsNum = self.specification.getNumOfPointsToRefine( self.grid.createGridGenerator().getNumberOfRefinablePoints() )
        self.grid.createGridGenerator().refine( SurplusRefinementFunctor(self.errors, pointsNum, self.specification.getAdaptThreshold()) )
Exemplo n.º 16
0
class Regressor(Learner):
    """
    Subclass of Learner, responsible for regression.
    The methods specific for regression are implemented here.
    """
    def __init__(self):
        """
        Constructor
        """
        super(self.__class__, self).__init__()
        # Errors per basis function
        self.errors = None
        # Error vector
        self.error = None

    def __getattr__(self, attr):
        """
        Overrides built-in method if method called is not a object
        method of this Descriptor, most probably it's a method of
        the learner so it tries to call the method
        from our specification
        @param attr: string method name
        @return: method call in specification
        """
        return getattr(self.specification, attr)

    def getL2NormError(self):
        """
        calculate L2-norm of error
        @return: last L2-norm of error
        """
        return np.sqrt(self.error.sum())

    def getMaxError(self):
        """
        calculate max error
        @return: max error
        """
        return np.sqrt(self.error.max())

    def getMinError(self):
        """
        calculate min error
        @return: min error
        """
        return np.sqrt(self.error.min())

    def evalError(self, data, alpha):
        """
        Evaluate regression MSE
        @param data: DataContainer data set
        @param alpha: DataVector alpha-vector
        @return: mean square error
        """
        size = data.getPoints().getNrows()
        if size == 0:
            return 0

        self.error = DataVector(size)
        self.getBOperator().mult(alpha, self.error)
        # error vector
        self.error.sub(data.getValues())
        # entries squared
        self.error.sqr()
        errorsum = self.error.sum()
        # MSE
        mse = errorsum / size

        # calculate error per basis function
        self.errors = DataVector(len(alpha))
        self.getBOperator().multTranspose(self.error, self.errors)
        self.errors.componentwise_mult(alpha)

        # calculate error per basis function
        #        self.errors = DataVector(alpha.getSize())
        #        self.specification.getBOperator().mult(self.error, data.getPoints(), self.errors)

        return mse

    def updateResults(self, alpha, trainSubset, testSubset=None):
        """
        Update different statistics about training progress
        @param alpha: DataVector alpha-vector
        @param trainSubset: DataContainer with training data
        @param testSubset: DataContainer with validation data
        """
        # self.knowledge.update(alpha)
        # eval Error for training data and append it to other in this iteration
        self.trainAccuracy.append(self.evalError(trainSubset, alpha))

        i = float(len(self.trainAccuracy))

        # eval error for test data and append it to other in this iteration
        if testSubset is not None:
            self.testAccuracy.append(self.evalError(testSubset, alpha))
            self.testingOverall.append(sum(self.testAccuracy) / i)

        self.trainingOverall.append(sum(self.trainAccuracy) / i)

        self.numberPoints.append(self.grid.getSize())

    def refineGrid(self):
        """
        Refines grid with the number of points as specified in corresponding
        TrainingSpecification object
        """
        self.notifyEventControllers(LearnerEvents.REFINING_GRID)
        refinableNum = self.grid.createGridGenerator(
        ).getNumberOfRefinablePoints()
        pointsNum = self.getNumOfPointsToRefine(refinableNum)
        functor = SurplusRefinementFunctor(self.errors, pointsNum,
                                           self.getAdaptThreshold())
        self.grid.createGridGenerator().refine(functor)
Exemplo n.º 17
0
class TestDataVector(unittest.TestCase):

    ##
    # Set up, create random DataVector and corresponding Python data structures.
    # @test DataVector::get(), DataVector::set()
    def setUp(self):
        from pysgpp import DataVector
        import random

        ## number of rows
        self.nrows = 5
        ## number of columns
        self.ncols = 4
        ## number of entries
        self.N = self.nrows * self.ncols
        ## random list of lists
        self.l_rand = [[
            2 * (random.random() - 0.5) for j in xrange(self.ncols)
        ] for i in xrange(self.nrows)]
        ## same as l_rand, but flattened
        self.l_rand_total = []
        for li in self.l_rand:
            self.l_rand_total.extend(li)
#        ## Data Vector, corresponding to l_rand
#        self.d_rand = DataVector(self.nrows,self.ncols)
#        for i in xrange(self.N):
#            self.d_rand[i] = self.l_rand_total[i]
#
#        for i in xrange(self.N):
#            self.assertEqual(self.d_rand[i], self.l_rand_total[i])
## Data Vector, corresponding to l_rand
        self.d_rand = DataVector(self.N)
        for i in xrange(self.N):
            self.d_rand[i] = self.l_rand_total[i]

        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i], self.l_rand_total[i])

    ##
    # Constructors4.
    # @test DataVector::DataVector(size_t size), DataVector::DataVector(size_t size, size_t dim), DataVector::DataVector(DataVectorDefinition &DataVectorDef), DataVector::getSize(), DataVector::getDim(), DataVector::getSize()
    # @todo (pflueged) DataVector::DataVector(double *input, size_t size, size_t dim)
    def testConstructor(self):
        from pysgpp import DataVector

        d = DataVector(2)
        self.assertEqual(len(d), 2)  # getSize()

#        d = DataVector(2,3)
#        self.assertEqual(d.getSize(), 2)
#        self.assertEqual(d.getDim(), 3)
#        self.assertEqual(len(d), 2*3) # getSize()
#
#        d2 = DataVector(self.d_rand)
#        for i in xrange(self.N):
#            self.assertEqual(d2[i], self.d_rand[i])
#        self.assertEqual(d2.getSize(), self.nrows)
#        self.assertEqual(d2.getDim(), self.ncols)
#        self.assertEqual(len(d2), self.N)
#        d2[self.ncols] = -4.0
#        self.assertNotEqual(d2[self.ncols], self.d_rand[self.ncols])

##
# Min, Max operations.
# @test DataVector::min(int d), DataVector::max(int d), DataVector::minmax(int d, double *min, double *max), DataVector::min(), DataVector::max()

    def testMinMax(self):

        #        # test dimension-dependent min, max
        #        for j in xrange(self.ncols):
        #            minj = min([self.l_rand[i][j] for i in xrange(self.nrows)])
        #            maxj = max([self.l_rand[i][j] for i in xrange(self.nrows)])
        #            self.assertEqual(self.d_rand.min(j), minj)
        #            self.assertEqual(self.d_rand.max(j), maxj)
        #            mi, ma = self.d_rand.minmax(j)
        #            self.assertEqual(mi, minj)
        #            self.assertEqual(ma, maxj)

        # test global min, max
        self.assertEqual(self.d_rand.min(), min(self.l_rand_total))
        self.assertEqual(self.d_rand.max(), max(self.l_rand_total))

    ##
    # Operations on DataVectors.
    # @test DataVector::sum(), DataVector::sqr(), DataVector::abs(), DataVector::componentwise_mult(), DataVector::componentwise_div()
    def testOps(self):
        from pysgpp import DataVector
        # sum
        self.assertAlmostEqual(self.d_rand.sum(), sum(self.l_rand_total))

        # sqr
        d = DataVector(self.d_rand)
        d.sqr()
        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i]**2, d[i])

        # abs
        d = DataVector(self.d_rand)
        d.abs()
        for i in xrange(self.N):
            self.assertEqual(abs(self.d_rand[i]), d[i])

        # componentwise_mult
        d = DataVector(self.d_rand)
        #	d2 = DataVector(self.nrows, self.ncols)
        d2 = DataVector(self.N)
        for i in xrange(self.N):
            d2[i] = i
        d.componentwise_mult(d2)
        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i] * i, d[i])

        # componentwise_div
        d = DataVector(self.d_rand)
        for i in xrange(self.N):
            d2[i] = i + 1
        d.componentwise_div(d2)
        for i in xrange(self.N):
            self.assertEqual(self.d_rand[i] / (i + 1), d[i])

    ##
    # Vector-Operations
    # @test DataVector::dotProduct(DataVector &vec)
    def testDotProduct(self):
        from pysgpp import DataVector

        x = 0

        d = DataVector(3)
        for i in xrange(len(d)):
            d[i] = i + 1
            x += d[i] * d[i]

        self.assertEqual(d.dotProduct(d), x)