def testSaveAllLoadAll(self):
        # test of two method is put together since it should test the capability
        # to store and restore data accurately
        builder = LearnerBuilder()

        controller = CheckpointController("saveload", pathlocal)

        # as storing of grid and knowledge is covered with other tests, only the test of learner is relevant and combination is
        classifier = builder.buildClassifier()\
                     .withTrainingDataFromARFFFile(pathlocal + "/traindata.arff")\
                     .withGrid().withLevel(2).withBorder(100)\
                     .withSpecification().withIdentityOperator().withLambda(0.00001).withAdaptPoints(2)\
                     .withStopPolicy().withAdaptiveItarationLimit(1)\
                     .withCGSolver().withImax(500)\
                     .withCheckpointController(controller)\
                     .andGetResult()
        classifier.learnData()

        controller.setLearner(classifier)
        controller.saveAll(0)


        del controller

        controller = CheckpointController("saveload", pathlocal)
        newClassifier = controller.loadAll(0)

        # quick and dirty way to compare to objects - with their string representation, would work only
        # if toString() method works properly
        self.assertEqual(classifier.toString(), newClassifier.toString())
    def testLoadLearnedKnowledge(self):
        controller = CheckpointController("sample", pathlocal)
        learnedKnowledge = controller.loadLearnedKnowledge(0)

        testValues = [-0.0310651210442,
                      -0.618841896127,
                       0.649230972775,
                       0.649230972775,
                      -0.618841896127]

        self.assertEqual(len(testValues), len(learnedKnowledge.getAlphas()))

        for i in xrange(len(testValues)):
            self.assertAlmostEqual(testValues[i], learnedKnowledge.getAlphas()[i])
    def testLoadGrid(self):
        dim = 2
        level = 2
        grid = Grid.createLinearGrid(dim)
        generator = grid.createGridGenerator()
        generator.regular(level)

        controller = CheckpointController("sample", pathlocal)
        sampleGrid = controller.loadGrid(0)

        # check dimension and size
        self.assertEqual(dim, sampleGrid.getStorage().dim())
        self.assertEqual(grid.getStorage().size(), sampleGrid.getStorage().size())

        # if string representations are equal, then grids are equal
        self.assertEqual(grid.serialize(), sampleGrid.serialize())
    def testSaveLearnedKnowledge(self):
        testValues = [-0.0310651210442,
                      -0.618841896127,
                       0.649230972775,
                       0.649230972775,
                      -0.618841896127]
        alpha = DataVector(len(testValues))
        for i in xrange(len(testValues)):
            alpha[i] = testValues[i]

        learnedKnowledge = LearnedKnowledge()
        learnedKnowledge.update(alpha)

        controller = CheckpointController("saveknowledge", pathlocal)
        controller.setLearnedKnowledge(learnedKnowledge)
        controller.saveLearnedKnowledge(0)

        sampleLines = list()
        f = gzip.open(pathlocal + "/saveknowledge.0.arff.gz", "r")
        try:
            for line in f.readlines():
                if len(line)>1 and "@" not in line:
                    sampleLines.append(float(line))
        finally:
            f.close()

        self.assertEqual(testValues, [float(i) for i in sampleLines])
    def testSaveGrid(self):
        dim = 2
        level = 2
        grid = Grid.createLinearGrid(dim)
        generator = grid.createGridGenerator()
        generator.regular(level)

        controller = CheckpointController("savegrid", pathlocal)
        controller.setGrid(grid)
        controller.saveGrid(0)

        f = gzip.open(pathlocal + "/savegrid.0.grid.gz", "r")
        try:
            sampleString = f.read()
        finally:
            f.close()

        self.assertEqual(grid.serialize(), sampleString)
Пример #6
0
    def constructObjectsFromOptions(cls, options):
        #Create builder for specified learner
        builder = LearnerBuilder()
        if options.regression:
            builder.buildRegressor()

        else:
            builder.buildClassifier()

        # load alpha file
        if options.alpha:
            builder.withInitialAlphaFromARFFFile(options.alpha)

        #dataset options
        if len(options.data) == 1:
            builder.withTrainingDataFromARFFFile(options.data[0])
        elif len(options.data) > 1:
            fileCounter = 0
            for filename in options.data:
                builder.withTrainingDataFromARFFFile(
                    filename, DataContainer.TRAIN_CATEGORY + str(fileCounter))
                fileCounter += 1
        else:
            raise Exception('Define the path to the training data set')

        if options.test: builder.withTestingDataFromARFFFile(options.test)

        #grid options
        builder = builder.withGrid()
        if options.grid:
            builder.fromFile(options.grid)
        else:
            try:
                if options.level: builder.withLevel(options.level)
                if options.polynom: builder.withPolynomialBase(options.polynom)
                if options.trapezoidboundary:
                    builder.withBorder(BorderTypes.TRAPEZOIDBOUNDARY)
                elif options.completeboundary:
                    builder.withBorder(BorderTypes.COMPLETEBOUNDARY)
                    # @fixme (khakhutv)the name "NONE" for the border type should be changed to something more meaningful
                elif options.border:
                    builder.withBorder(BorderTypes.NONE)
            except:
                raise Exception('Grid configuration arguments incorrect')

        if options.adapt_start:
            builder.withStartingIterationNumber(options.adapt_start)

        #training specification
        builder = builder.withSpecification()
        if options.adapt_rate: builder.withAdaptRate(options.adapt_rate)
        elif options.adapt_points:
            builder.withAdaptPoints(options.adapt_points)

        if options.regparam: builder.withLambda(options.regparam)
        if options.zeh:
            if options.zeh == 'laplace': builder.withLaplaceOperator()
            elif options.zeh == 'identity': builder.withIdentityOperator()
            else: raise Exception('Incorrect regulariation operator type')

        if options.adapt_threshold:
            builder.withAdaptThreshold(options.adapt_threshold)

        #stop policy
        builder = builder.withStopPolicy()
        if options.adaptive:
            builder.withAdaptiveItarationLimit(options.adaptive)
        if options.grid_limit: builder.withGridSizeLimit(options.grid_limit)
        if options.mse_limit: builder.withMSELimit(options.mse_limi)
        if options.epochs_limit: builder.withEpochsLimit(options.epochs_limit)

        # linear solver
        builder = builder.withCGSolver()
        if options.r: builder.withAccuracy(options.r)
        if options.max_r: builder.withThreshold(options.max_r)
        if options.imax: builder.withImax(options.imax)

        #presentor
        if options.verbose:  # print to the screen
            if options.regression:
                builder.withProgressPresenter(InfoToScreenRegressor())
            else:
                builder.withProgressPresenter(InfoToScreen())
        if options.stats:
            builder.withProgressPresenter(InfoToFile(options.stats))

        #checkpoint
        if options.checkpoint:
            title = os.path.basename(options.checkpoint)
            path = os.path.dirname(options.checkpoint)
            checkpointController = CheckpointController(title, path)

            builder.withCheckpointController(checkpointController)

        # Folding
        if options.mode in [
                'fold', 'folds', 'foldstratified', 'foldf', 'foldr'
        ]:
            if options.mode == 'fold': builder.withRandomFoldingPolicy()
            elif options.mode == 'folds': builder.withSequentialFoldingPolicy()
            elif options.mode in ['foldstratified', 'foldr']:
                builder.withStratifiedFoldingPolicy()
            elif options.mode == 'foldf':
                builder.withFilesFoldingPolicy()
            if options.seed: builder.withSeed(options.seed)
            if options.level: builder.withLevel(options.level)

        #Get Results and perform wanted action
        learner = builder.andGetResult()
        options.mode = options.mode.lower()
        if options.mode == 'normal':
            learner.learnData()
        elif options.mode == 'test':
            learner.learnDataWithTest()
        elif options.mode == 'apply':
            learner.applyData(ARFFAdapter(options.data).loadData().getPoints())
        elif options.mode in [
                'fold', 'folds', 'foldstratified', 'foldf', 'foldr'
        ]:
            builder.getCheckpointController().generateFoldValidationJob(
                'PUT_YOUR_EMAIL_HERE')
        elif options.mode in ['eval', 'evalstdin']:
            raise Exception('This action is not implemented yet')
        else:
            raise Exception('Incorrect action configuration')
Пример #7
0
    def constructObjectsFromFile(cls, filename):
        configuration = configparser.ConfigParser()
        configuration.readfp(open(filename, 'r'))

        #Create builder for specified learner
        builder = LearnerBuilder()
        learner_type = configuration.get('learner', 'type')
        if learner_type == 'classification':
            builder.buildClassifier()
        elif learner_type == 'regression':
            builder.buildRegressor()
        else:
            raise Exception('Wrong learner type in job configuration file')

        #dataset options
        options = TerminalController.itemsToDict(configuration.items('data'))

        if options['file_type'] == 'arff':
            if 'train_file' in options:
                if type(options['train_file']) != list:
                    builder.withTrainingDataFromARFFFile(options['train_file'])
                else:
                    fileCounter = 0
                    for train_file in options['train_file']:
                        builder.withTrainingDataFromARFFFile(
                            train_file,
                            DataContainer.TRAIN_CATEGORY + str(fileCounter))
                        fileCounter += 1

            else:
                raise Exception(
                    'Path to file with training data set is not defined in configurationfile'
                )

            if 'test_file' in options:
                builder.withTestingDataFromARFFFile(options['test_file'])

        else:
            raise Exception('Unsupported data type in job configuration file')

        #grid options
        builder = builder.withGrid()
        options = TerminalController.itemsToDict(configuration.items('grid'))

        if 'grid_file' in options:
            builder.fromFile(options['grid_file'])
        else:
            try:
                if 'level' in options: builder.withLevel(int(options['level']))
                if 'polynomial' in options:
                    builder.withPolynomialBase(int(options['polynomial']))
                if 'border' in options: builder.withBorder(options['border'])
            except:
                raise Exception('Grid configuration in job file is incorrect')

        #training specification
        builder = builder.withSpecification()
        options = TerminalController.itemsToDict(
            configuration.items('refinement'))

        if 'points' in options:
            if '.' in options['points']:
                builder.withAdaptRate(float(options['points']))
            else:
                builder.withAdaptPoints(int(options['points']))

        options = TerminalController.itemsToDict(
            configuration.items('learner'))

        if 'regularization_parameter' in options:
            builder.withLambda(float(options['regularization_parameter']))
        if 'regularization_operator' in options:
            if options['regularization_operator'] == 'laplace':
                builder.withLaplaceOperator()
            elif options['regularization_operator'] == 'idenitty':
                builder.withIdentityOperator()
            else:
                raise Exception('Incorrect regulariation operator type')

        if 'threshold' in options:
            builder.withAdaptThreshold(float(options['threshold']))

        #stop policy
        builder = builder.withStopPolicy()
        options = TerminalController.itemsToDict(
            configuration.items('refinement'))

        if 'iterations' in options:
            builder.withAdaptiveItarationLimit(int(options['iterations']))
        if 'gridsize' in options:
            builder.withGridSizeLimit(int(options['gridsize']))
        if 'mse' in options: builder.withMSELimit(float(options['mse']))
        if 'epochs' in options: builder.withEpochsLimit(int(options['epochs']))

        # linear solver
        builder = builder.withCGSolver()
        options = TerminalController.itemsToDict(configuration.items('solver'))

        if 'accuracy' in options:
            builder.withAccuracy(float(options['accuracy']))
        if 'imax' in options: builder.withImax(int(options['imax']))
        if 'max_threshold' in options:
            builder.withThreshold(float(options['max_threshold']))

        #presentor
        options = TerminalController.itemsToDict(configuration.items('output'))
        if 'type' in options:
            types = options['type'].split(',')
            for type in types:
                if type.strip() == 'InfoToScreen':
                    builder.withProgressPresenter(InfoToScreen())
                if type.strip() == 'InfoToScreenRegressor':
                    builder.withProgressPresenter(InfoToScreenRegressor())
                elif type.strip() == 'InfoToFile':
                    if 'filename' in options:
                        builder.withProgressPresenter(
                            InfoToFile(options['filename']))
                    else:
                        raise Exception(
                            'Define filename in order to use InfoToFile output'
                        )

        #checkpoint
        options = TerminalController.itemsToDict(
            configuration.items('checkpoints'))

        if 'name' in options:
            path = options['path'] if 'path' in options else None
            interval = options[
                'interval'] if 'inte    rval' in options else None
            checkpointController = CheckpointController(
                options['name'], path, interval)
            if 'restore_iteration' in options:
                learner = checkpointController.loadAll(
                    options['restore_iteration'])

            builder.withCheckpointController(checkpointController)

        #Get Results and perform wanted action
        # if learner was not created by checkpoint controller, create it with learner builder
        try:
            if learner not in dir():
                learner = builder.andGetResult()
        except:
            learner = builder.andGetResult()

        # Folding
        options = TerminalController.itemsToDict(
            configuration.items('folding'))
        if options.type in [
                'fold', 'folds', 'foldstratified', 'foldr', 'foldf'
        ]:
            if options.type == 'fold': builder.withRandomFoldingPolicy()
            elif options.type == 'folds': builder.withSequentialFoldingPolicy()
            elif options.type in ['foldstratified', 'foldr']:
                builder.withStratifiedFoldingPolicy()
            elif options.type == 'foldf':
                builder.withFilesFoldingPolicy()
            if options.seed: builder.withSeed(options.seed)
            if options.level: builder.withLevel(options.level)
        else: raise Exception('Unknown folding type')

        options = TerminalController.itemsToDict(
            configuration.items('learner'))
        if options['action'] == 'learn':
            if ('with_testing'
                    not in options) or options['with_testing'].lower() == 'no':
                learner.learnData()
            elif options['with_testing'].lower() == 'yes':
                learner.learnDataWithTest()
            else:
                raise Exception(
                    'with_testion can only be set to "yes" or "no"')
        elif options['action'] == 'apply':
            points_file = configuration.get('data', 'points_file')
            if points_file != None:
                learner.applyData(
                    ARFFAdapter(points_file).loadData().getPoints())
            else:
                raise Exception(
                    'To evaluate value of points define file path "points_file" in the section "data"'
                )
        elif options['action'] == 'fold':
            builder.getCheckpointController().generateFoldValidationJob(
                'PUT_YOUR_EMAIL_HERE')
        else:
            raise Exception('Incorrect action in job configuration file')