Пример #1
0
    def learn_and_test_on_test_data(self,
                                    learners,
                                    learn_set,
                                    test_set,
                                    preprocessors=(),
                                    callback=None,
                                    store_classifiers=False,
                                    store_examples=False):
        """
        Perform a test, where learners are trained on one dataset and tested
        on another.

        :param learners: list of learners to be tested
        :param learn_set: a dataset used for training
        :param test_set: a dataset used for testing
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that is be called after each classifier is computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """
        learn_set, learn_weight = demangle_examples(learn_set)
        test_set, test_weight = demangle_examples(test_set)

        test_type = self.check_test_type(learn_set, learners)
        self.check_test_type(test_set, learners)

        test_results = ExperimentResults(
            1,
            classifier_names=[getobjectname(l) for l in learners],
            domain=test_set.domain,
            test_type=test_type,
            weights=test_weight)
        test_results.results = [
            test_results.create_tested_example(0, example)
            for i, example in enumerate(test_set)
        ]

        classifiers, results = self._learn_and_test_on_test_data(
            learners, learn_set, learn_weight, test_set, preprocessors,
            callback)

        if store_classifiers:
            test_results.classifiers = classifiers
        if store_examples:
            test_results.examples = test_set

        for example, classifier, result in results:
            test_results.results[example].set_result(classifier, *result)
        return test_results
Пример #2
0
    def leave_one_out(self,
                      learners,
                      examples,
                      preprocessors=(),
                      callback=None,
                      store_classifiers=False,
                      store_examples=False):
        """Perform leave-one-out evaluation of learners on a data set.

        :param learners: list of learners to be tested
        :param examples: data table on which the learners will be tested
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that will be called after each fold is
               computed.
        :param store_classifiers: if True, classifiers will be accessible in
               test_results.
        :param store_examples: if True, examples will be accessible in
               test_results.
        :return: :obj:`ExperimentResults`
        """
        examples, weight = demangle_examples(examples)
        return self.test_with_indices(learners, (examples, weight),
                                      indices=range(len(examples)),
                                      preprocessors=preprocessors,
                                      callback=callback,
                                      store_classifiers=store_classifiers,
                                      store_examples=store_examples)
Пример #3
0
    def learn_and_test_on_learn_data(self,
                                     learners,
                                     examples,
                                     preprocessors=(),
                                     callback=None,
                                     store_classifiers=False,
                                     store_examples=False):
        """
        Perform a test where learners are trained and tested on the same data.

        :param learners: list of learners to be tested
        :param examples: data table on which the learners will be tested
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that will be called after each fold is computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """

        examples, weight = demangle_examples(examples)
        test_type = self.check_test_type(examples, learners)

        # If preprocessors are not used, we use the same dataset for learning and testing. Otherwise we need to
        # clone it.
        if not filter(lambda x: x[0] != "B", preprocessors):
            learn_set, test_set = self._preprocess_data(
                examples, Orange.data.Table(examples.domain), preprocessors)
            test_set = learn_set
        else:
            learn_set, test_set = self._preprocess_data(
                examples, Orange.data.Table(examples), preprocessors)

        classifiers = self._train_with_callback(learners, learn_set, weight,
                                                callback)

        test_results = ExperimentResults(
            1,
            classifier_names=[getobjectname(l) for l in learners],
            test_type=test_type,
            domain=examples.domain,
            weights=weight)
        test_results.results = [
            test_results.create_tested_example(0, example)
            for i, example in enumerate(examples)
        ]

        if store_classifiers:
            test_results.classifiers = classifiers
        if store_examples:
            test_results.examples = test_set

        results = self._test_on_data(classifiers, test_set)
        for example, classifier, result in results:
            test_results.results[example].set_result(classifier, *result)
        return test_results
Пример #4
0
    def proportion_test(self, learners, data, learning_proportion, times=10,
                   stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible, preprocessors=(), random_generator=0,
                   callback=None, store_classifiers=False, store_examples=False, testAttrFilter=None, testFilterVal=None):
        """
        Perform a test, where learners are trained and tested on different data sets. Training and test sets are
        generated by proportionally splitting data.

        :param learners: list of learners to be tested
        :param data: a dataset used for evaluation
        :param learning_proportion: proportion of examples to be used for training
        :param times: number of test repetitions
        :param stratification: use stratification when constructing train and test sets.
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that is be called after each classifier is computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """
        examples = self.getExamplesAndSetTrainBias(data, testAttrFilter, testFilterVal)

        pick = Orange.core.MakeRandomIndices2(stratified = stratification, p0 = learning_proportion, randomGenerator = random_generator)

        examples, weight = demangle_examples(examples)

        test_type = self.check_test_type(examples, learners)
        
        test_results = orngTest.ExperimentResults(times,
                                        classifierNames = [getobjectname(l) for l in learners],
                                        domain=examples.domain,
                                        test_type = test_type,
                                        weights=weight)
        test_results.classifiers = []
        offset=0
        for time in xrange(times):
            indices = pick(examples)
            learn_set = examples.selectref(indices, 0)
            test_set = examples.selectref(indices, 1)
            #Add train bias to the lear_set
            if self.trainBias:
                learn_set = dataUtilities.concatenate([learn_set, self.trainBias], True)[0]
            classifiers, results = self._learn_and_test_on_test_data(learners, learn_set, weight, test_set, preprocessors)
            if store_classifiers:
                test_results.classifiers.append(classifiers)

            test_results.results.extend(test_results.create_tested_example(time, example)
                                        for i, example in enumerate(test_set))
            for example, classifier, result in results:
                test_results.results[offset+example].set_result(classifier, *result)
            offset += len(test_set)

            if callback:
                callback()
        return test_results
Пример #5
0
    def learning_curve(self,
                       learners,
                       examples,
                       cv_indices=None,
                       proportion_indices=None,
                       proportions=Orange.core.frange(0.1),
                       preprocessors=(),
                       random_generator=0,
                       callback=None):
        """
        Compute a learning curve using multiple cross-validations where
        models are trained on different portions of the training data.

        :param learners: list of learners to be tested
        :param examples: a dataset used for evaluation
        :param cv_indices: indices used for crossvalidation
        :param proportion_indices: indices for proportion selection
        :param proportions: proportions of train data to be used
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that is be called after each classifier is computed.
        :return: list of :obj:`ExperimentResults`
        """
        if cv_indices is None:
            cv_indices = Orange.core.MakeRandomIndicesCV(
                folds=10,
                stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=random_generator)
        if proportion_indices is None:
            proportion_indices = Orange.core.MakeRandomIndices2(
                stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible,
                randomGenerator=random_generator)

        examples, weight = demangle_examples(examples)
        indices = cv_indices(examples)

        all_results = []
        for p in proportions:

            def select_proportion_preprocessor(examples):
                return examples.selectref(proportion_indices(examples, p0=p),
                                          0)

            test_results = self.test_with_indices(
                learners,
                examples,
                indices,
                preprocessors=list(preprocessors) +
                [("L", select_proportion_preprocessor)],
                callback=callback)
            all_results.append(test_results)
        return all_results
Пример #6
0
    def cross_validation(
            self,
            learners,
            examples,
            folds=10,
            stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible,
            preprocessors=(),
            random_generator=0,
            callback=None,
            store_classifiers=False,
            store_examples=False):
        """Perform cross validation with specified number of folds.

        :param learners: list of learners to be tested
        :param examples: data table on which the learners will be tested
        :param folds: number of folds to perform
        :param stratified: sets, whether indices should be stratified
        :param preprocessors: a list of preprocessors to be used on data.
        :param random_generator: random seed or random generator for selection
               of indices
        :param callback: a function that will be called after each fold is
               computed.
        :param store_classifiers: if True, classifiers will be accessible in
               test_results.
        :param store_examples: if True, examples will be accessible in
               test_results.
        :return: :obj:`ExperimentResults`
        """
        (examples, weight) = demangle_examples(examples)

        if self.fixedIdx:
            # ignore folds
            indices = self.fixedIdx
        else:
            indices = Orange.core.MakeRandomIndicesCV(
                examples,
                folds,
                stratified=stratified,
                random_generator=random_generator)

        return self.test_with_indices(learners=learners,
                                      examples=(examples, weight),
                                      indices=indices,
                                      preprocessors=preprocessors,
                                      callback=callback,
                                      store_classifiers=store_classifiers,
                                      store_examples=store_examples)
Пример #7
0
    def cross_validation(self, learners, examples, folds=10,
            stratified=Orange.core.MakeRandomIndices.StratifiedIfPossible,
            preprocessors=(), random_generator=0, callback=None,
            store_classifiers=False, store_examples=False):
        """Perform cross validation with specified number of folds.

        :param learners: list of learners to be tested
        :param examples: data table on which the learners will be tested
        :param folds: number of folds to perform
        :param stratified: sets, whether indices should be stratified
        :param preprocessors: a list of preprocessors to be used on data.
        :param random_generator: random seed or random generator for selection
               of indices
        :param callback: a function that will be called after each fold is
               computed.
        :param store_classifiers: if True, classifiers will be accessible in
               test_results.
        :param store_examples: if True, examples will be accessible in
               test_results.
        :return: :obj:`ExperimentResults`
        """
        (examples, weight) = demangle_examples(examples)

        if self.fixedIdx:
            # ignore folds
            indices = self.fixedIdx
        else:
            indices = Orange.core.MakeRandomIndicesCV(examples, folds,
                stratified=stratified, random_generator=random_generator)

        return self.test_with_indices(
            learners=learners,
            examples=(examples, weight),
            indices=indices,
            preprocessors=preprocessors,
            callback=callback,
            store_classifiers=store_classifiers,
            store_examples=store_examples)
Пример #8
0
    def test_on_data(self,
                     classifiers,
                     examples,
                     store_classifiers=False,
                     store_examples=False):
        """
        Test classifiers on examples

        :param classifiers: classifiers to test
        :param examples: examples to test on
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        """

        examples, weight = demangle_examples(examples)
        test_type = self.check_test_type(examples, classifiers)

        test_results = ExperimentResults(
            1,
            classifier_names=[getobjectname(l) for l in classifiers],
            domain=examples.domain,
            test_type=test_type,
            weights=weight)
        test_results.results = [
            test_results.create_tested_example(0, example)
            for i, example in enumerate(examples)
        ]

        if store_examples:
            test_results.examples = examples
        if store_classifiers:
            test_results.classifiers = classifiers

        results = self._test_on_data(classifiers, examples)
        for example, classifier, result in results:
            test_results.results[example].set_result(classifier, *result)
        return test_results
Пример #9
0
    def proportion_test(
            self,
            learners,
            data,
            learning_proportion,
            times=10,
            stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible,
            preprocessors=(),
            random_generator=0,
            callback=None,
            store_classifiers=False,
            store_examples=False,
            testAttrFilter=None,
            testFilterVal=None):
        """
        Perform a test, where learners are trained and tested on different data sets. Training and test sets are
        generated by proportionally splitting data.

        :param learners: list of learners to be tested
        :param data: a dataset used for evaluation
        :param learning_proportion: proportion of examples to be used for training
        :param times: number of test repetitions
        :param stratification: use stratification when constructing train and test sets.
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that is be called after each classifier is computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """
        examples = self.getExamplesAndSetTrainBias(data, testAttrFilter,
                                                   testFilterVal)

        pick = Orange.core.MakeRandomIndices2(stratified=stratification,
                                              p0=learning_proportion,
                                              randomGenerator=random_generator)

        examples, weight = demangle_examples(examples)

        test_type = self.check_test_type(examples, learners)

        test_results = orngTest.ExperimentResults(
            times,
            classifierNames=[getobjectname(l) for l in learners],
            domain=examples.domain,
            test_type=test_type,
            weights=weight)
        test_results.classifiers = []
        offset = 0
        for time in xrange(times):
            indices = pick(examples)
            learn_set = examples.selectref(indices, 0)
            test_set = examples.selectref(indices, 1)
            #Add train bias to the lear_set
            if self.trainBias:
                learn_set = dataUtilities.concatenate(
                    [learn_set, self.trainBias], True)[0]
            classifiers, results = self._learn_and_test_on_test_data(
                learners, learn_set, weight, test_set, preprocessors)
            if store_classifiers:
                test_results.classifiers.append(classifiers)

            test_results.results.extend(
                test_results.create_tested_example(time, example)
                for i, example in enumerate(test_set))
            for example, classifier, result in results:
                test_results.results[offset + example].set_result(
                    classifier, *result)
            offset += len(test_set)

            if callback:
                callback()
        return test_results
Пример #10
0
    def learning_curve_with_test_data(
            self,
            learners,
            learn_set,
            test_set,
            times=10,
            proportions=Orange.core.frange(0.1),
            stratification=Orange.core.MakeRandomIndices.StratifiedIfPossible,
            preprocessors=(),
            random_generator=0,
            store_classifiers=False,
            store_examples=False):
        """
        Compute a learning curve given two datasets. Models are learned on
        proportion of the first dataset and then used to make predictions for
        the second dataset.

        :param learners: list of learners to be tested
        :param learn_set: a dataset used for evaluation
        :param test_set: a dataset used for evaluation
        :param proportions: proportions of train data to be used
        :param preprocessors: a list of preprocessors to be used on data.
        :return: list of :obj:`ExperimentResults`
        """
        learn_set, learn_weight = demangle_examples(learn_set)
        test_set, test_weight = demangle_examples(test_set)
        test_type = self.check_test_type(learn_set, learners)
        self.check_test_type(test_set, learners)

        indices = Orange.core.MakeRandomIndices2(
            stratified=stratification, randomGenerator=random_generator)

        all_results = []
        for p in proportions:
            test_results = ExperimentResults(
                times,
                classifier_names=[getobjectname(l) for l in learners],
                domain=test_set.domain,
                test_type=test_type,
                weights=test_weight)
            offset = 0
            for t in xrange(times):
                test_results.results.extend(
                    test_results.create_tested_example(t, example)
                    for i, example in enumerate(test_set))

                learn_examples = learn_set.selectref(indices(learn_set, p), 0)
                classifiers, results = self._learn_and_test_on_test_data\
                    (learners, learn_examples, learn_weight, test_set,
                    preprocessors=preprocessors)

                for example, classifier, result in results:
                    test_results.results[offset + example].set_result(
                        classifier, *result)
                offset += len(test_set)

                if store_classifiers:
                    test_results.classifiers.append(classifiers)
                if store_examples:
                    test_results.examples = learn_examples

            all_results.append(test_results)
        return all_results
Пример #11
0
    def test_with_indices(self,
                          learners,
                          examples,
                          indices,
                          preprocessors=(),
                          callback=None,
                          store_classifiers=False,
                          store_examples=False,
                          **kwargs):
        """
        Perform a cross-validation-like test. Examples for each fold are
        selected based on given indices.

        :param learners: list of learners to be tested
        :param examples: data table on which the learners will be tested
        :param indices: a list of integers that defines, which examples will be
               used for testing in each fold. The number of indices should be
               equal to the number of examples.
        :param preprocessors: a list of preprocessors to be used on data.
        :param callback: a function that will be called after each fold is
               computed.
        :param store_classifiers: if True, classifiers will be accessible in test_results.
        :param store_examples: if True, examples will be accessible in test_results.
        :return: :obj:`ExperimentResults`
        """
        examples, weight = demangle_examples(examples)
        if not examples:
            raise ValueError("Test data set with no examples")
        test_type = self.check_test_type(examples, learners)
        if "cache" in kwargs:
            raise ValueError("This feature is no longer supported.")

        niterations = max(indices) + 1
        test_result = ExperimentResults(
            niterations,
            classifier_names=[getobjectname(l) for l in learners],
            domain=examples.domain,
            weights=weight,
            test_type=test_type)

        test_result.results = [
            test_result.create_tested_example(indices[i], example)
            for i, example in enumerate(examples)
        ]

        if store_examples:
            test_result.examples = examples

        for fold in xrange(niterations):
            results, classifiers = self.one_fold_with_indices(
                learners, examples, fold, indices, preprocessors, weight)

            for example, learner, result in results:
                test_result.results[example].set_result(learner, *result)

            if store_classifiers:
                test_result.classifiers.append(classifiers)
            if callback:
                callback()

        return test_result