def test_Compare(self):
     mxKFoldRun = MxKFoldRun(5, 2)
     combined5x2t = Combined5x2t()
     experimentPerformance1 = mxKFoldRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.iris))
     experimentPerformance2 = mxKFoldRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.iris))
     self.assertAlmostEqual(
         0.186,
         combined5x2t.compare(experimentPerformance1,
                              experimentPerformance2).getPValue(), 3)
     experimentPerformance1 = mxKFoldRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.tictactoe))
     experimentPerformance2 = mxKFoldRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.tictactoe))
     self.assertAlmostEqual(
         0.0000059,
         combined5x2t.compare(experimentPerformance1,
                              experimentPerformance2).getPValue(), 7)
     experimentPerformance1 = mxKFoldRun.execute(
         Experiment(Lda(), Parameter(1), self.dermatology))
     experimentPerformance2 = mxKFoldRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.dermatology))
     self.assertAlmostEqual(
         0.9819,
         combined5x2t.compare(experimentPerformance1,
                              experimentPerformance2).getPValue(), 4)
     experimentPerformance1 = mxKFoldRun.execute(
         Experiment(Dummy(), Parameter(1), self.nursery))
     experimentPerformance2 = mxKFoldRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         0.0,
         combined5x2t.compare(experimentPerformance1,
                              experimentPerformance2).getPValue(), 4)
     experimentPerformance1 = mxKFoldRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.car))
     experimentPerformance2 = mxKFoldRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.car))
     self.assertAlmostEqual(
         0.00043,
         combined5x2t.compare(experimentPerformance1,
                              experimentPerformance2).getPValue(), 5)
     experimentPerformance1 = mxKFoldRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.bupa))
     experimentPerformance2 = mxKFoldRun.execute(
         Experiment(Lda(), Parameter(1), self.bupa))
     self.assertAlmostEqual(
         0.0663,
         combined5x2t.compare(experimentPerformance1,
                              experimentPerformance2).getPValue(), 4)
Пример #2
0
 def test_Compare(self):
     kFoldRun = KFoldRun(10)
     pairedt = Pairedt()
     experimentPerformance1 = kFoldRun.execute(Experiment(C45(), C45Parameter(1, True, 0.2), self.iris))
     experimentPerformance2 = kFoldRun.execute(Experiment(LinearPerceptron(), LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100), self.iris))
     self.assertAlmostEqual(0.379, pairedt.compare(experimentPerformance1, experimentPerformance2).getPValue(), 3)
     experimentPerformance1 = kFoldRun.execute(Experiment(C45(), C45Parameter(1, True, 0.2), self.tictactoe))
     experimentPerformance2 = kFoldRun.execute(Experiment(Bagging(), BaggingParameter(1, 50), self.tictactoe))
     self.assertAlmostEqual(0.00000692, pairedt.compare(experimentPerformance1, experimentPerformance2).getPValue(), 7)
     experimentPerformance1 = kFoldRun.execute(Experiment(Lda(), Parameter(1), self.dermatology))
     experimentPerformance2 = kFoldRun.execute(Experiment(LinearPerceptron(), LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100), self.dermatology))
     self.assertAlmostEqual(0.7842, pairedt.compare(experimentPerformance1, experimentPerformance2).getPValue(), 4)
     experimentPerformance1 = kFoldRun.execute(Experiment(Dummy(), Parameter(1), self.nursery))
     experimentPerformance2 = kFoldRun.execute(Experiment(NaiveBayes(), Parameter(1), self.nursery))
     self.assertAlmostEqual(0.0, pairedt.compare(experimentPerformance1, experimentPerformance2).getPValue(), 4)
     experimentPerformance1 = kFoldRun.execute(Experiment(NaiveBayes(), Parameter(1), self.car))
     experimentPerformance2 = kFoldRun.execute(Experiment(Bagging(), BaggingParameter(1, 50), self.car))
     self.assertAlmostEqual(0.00000336, pairedt.compare(experimentPerformance1, experimentPerformance2).getPValue(), 7)
     experimentPerformance1 = kFoldRun.execute(Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()), self.bupa))
     experimentPerformance2 = kFoldRun.execute(Experiment(Lda(), Parameter(1), self.bupa))
     self.assertAlmostEqual(0.1640, pairedt.compare(experimentPerformance1, experimentPerformance2).getPValue(), 4)
 def test_Train(self):
     bagging = Bagging()
     baggingParameter = BaggingParameter(1, 100)
     bagging.train(self.iris.getInstanceList(), baggingParameter)
     self.assertAlmostEqual(2.0, 100 * bagging.test(self.iris.getInstanceList()).getErrorRate(), 2)
     bagging.train(self.bupa.getInstanceList(), baggingParameter)
     self.assertAlmostEqual(42.03, 100 * bagging.test(self.bupa.getInstanceList()).getErrorRate(), 2)
     bagging.train(self.dermatology.getInstanceList(), baggingParameter)
     self.assertAlmostEqual(2.46, 100 * bagging.test(self.dermatology.getInstanceList()).getErrorRate(), 2)
     bagging.train(self.car.getInstanceList(), baggingParameter)
     self.assertAlmostEqual(0.0, 100 * bagging.test(self.car.getInstanceList()).getErrorRate(), 2)
     bagging.train(self.tictactoe.getInstanceList(), baggingParameter)
     self.assertAlmostEqual(0.0, 100 * bagging.test(self.tictactoe.getInstanceList()).getErrorRate(), 2)
Пример #4
0
    def train(self, trainSet: InstanceList, parameters: BaggingParameter):
        """
        Bagging bootstrap ensemble method that creates individuals for its ensemble by training each classifier on a
        random redistribution of the training set.
        This training method is for a bagged decision tree classifier. 20 percent of the instances are left aside for
        pruning of the trees 80 percent of the instances are used for training the trees. The number of trees
        (forestSize) is a parameter, and basically the method will learn an ensemble of trees as a model.

        PARAMETERS
        ----------
        trainSet : InstanceList
            Training data given to the algorithm.
        parameters : Parameter
            Parameters of the bagging trees algorithm. ensembleSize returns the number of trees in the bagged forest.
        """
        forestSize = parameters.getEnsembleSize()
        forest = []
        for i in range(forestSize):
            bootstrap = trainSet.bootstrap(i)
            tree = DecisionTree(
                DecisionNode(InstanceList(bootstrap.getSample())))
            forest.append(tree)
        self.model = TreeEnsembleModel(forest)
Пример #5
0
 def test_Execute(self):
     mxKFoldRun = MxKFoldRun(5, 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.iris))
     self.assertAlmostEqual(
         6.13, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.tictactoe))
     self.assertAlmostEqual(
         23.51,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.bupa))
     self.assertAlmostEqual(
         37.05,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.dermatology))
     self.assertAlmostEqual(
         15.41,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Lda(), Parameter(1), self.bupa))
     self.assertAlmostEqual(
         34.72,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Lda(), Parameter(1), self.dermatology))
     self.assertAlmostEqual(
         4.04, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.iris))
     self.assertAlmostEqual(
         5.2, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.dermatology))
     self.assertAlmostEqual(
         5.46, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.car))
     self.assertAlmostEqual(
         16.52,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         9.80, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.tictactoe))
     self.assertAlmostEqual(
         8.77, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.car))
     self.assertAlmostEqual(
         9.77, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Dummy(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         67.09,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = mxKFoldRun.execute(
         Experiment(Dummy(), Parameter(1), self.iris))
     self.assertAlmostEqual(
         70.53,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
 def test_Execute(self):
     kFoldRun = KFoldRun(10)
     experimentPerformance = kFoldRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.iris))
     self.assertAlmostEqual(
         6.00, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.tictactoe))
     self.assertAlmostEqual(
         18.78,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.bupa))
     self.assertAlmostEqual(
         36.85,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.dermatology))
     self.assertAlmostEqual(
         10.92,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Lda(), Parameter(1), self.bupa))
     self.assertAlmostEqual(
         31.61,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Lda(), Parameter(1), self.dermatology))
     self.assertAlmostEqual(
         3.30, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.iris))
     self.assertAlmostEqual(
         5.33, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.dermatology))
     self.assertAlmostEqual(
         3.81, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.car))
     self.assertAlmostEqual(
         14.88,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = kFoldRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         9.71, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.tictactoe))
     self.assertAlmostEqual(
         3.55, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.car))
     self.assertAlmostEqual(
         6.77, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Dummy(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         67.12,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = kFoldRun.execute(
         Experiment(Dummy(), Parameter(1), self.iris))
     self.assertAlmostEqual(
         79.33,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
 def test_Execute(self):
     stratifiedMxKRun = StratifiedMxKFoldRun(5, 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.iris))
     self.assertAlmostEqual(
         8.00, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.tictactoe))
     self.assertAlmostEqual(
         22.03,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.bupa))
     self.assertAlmostEqual(
         33.33,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.dermatology))
     self.assertAlmostEqual(
         13.66,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Lda(), Parameter(1), self.bupa))
     self.assertAlmostEqual(
         33.05,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Lda(), Parameter(1), self.dermatology))
     self.assertAlmostEqual(
         3.55, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.iris))
     self.assertAlmostEqual(
         6.00, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.dermatology))
     self.assertAlmostEqual(
         4.66, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.car))
     self.assertAlmostEqual(
         15.11,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         9.68, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.tictactoe))
     self.assertAlmostEqual(
         9.29, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.car))
     self.assertAlmostEqual(
         9.20, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Dummy(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         66.67,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = stratifiedMxKRun.execute(
         Experiment(Dummy(), Parameter(1), self.iris))
     self.assertAlmostEqual(
         66.67,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
 def test_Execute(self):
     bootstrapRun = BootstrapRun(10)
     experimentPerformance = bootstrapRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.iris))
     self.assertAlmostEqual(
         4.33, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(C45(), C45Parameter(1, True, 0.2), self.tictactoe))
     self.assertAlmostEqual(
         13.16,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.bupa))
     self.assertAlmostEqual(
         24.84,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Knn(), KnnParameter(1, 3, EuclidianDistance()),
                    self.dermatology))
     self.assertAlmostEqual(
         8.01, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Lda(), Parameter(1), self.bupa))
     self.assertAlmostEqual(
         32.03,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Lda(), Parameter(1), self.dermatology))
     self.assertAlmostEqual(
         2.95, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.iris))
     self.assertAlmostEqual(
         3.27, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(LinearPerceptron(),
                    LinearPerceptronParameter(1, 0.1, 0.99, 0.2, 100),
                    self.dermatology))
     self.assertAlmostEqual(
         2.65, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.car))
     self.assertAlmostEqual(
         14.75,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(NaiveBayes(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         9.71, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.tictactoe))
     self.assertAlmostEqual(
         3.00, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Bagging(), BaggingParameter(1, 50), self.car))
     self.assertAlmostEqual(
         3.44, 100 * experimentPerformance.meanPerformance().getErrorRate(),
         2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Dummy(), Parameter(1), self.nursery))
     self.assertAlmostEqual(
         66.79,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)
     experimentPerformance = bootstrapRun.execute(
         Experiment(Dummy(), Parameter(1), self.iris))
     self.assertAlmostEqual(
         66.67,
         100 * experimentPerformance.meanPerformance().getErrorRate(), 2)