Пример #1
0
    def train(self):
        """
        Learns a decision tree and saves
        the model to a file.
        """
        examples = self.data["train"]
        features = set(examples[0].features.keys())

        self.tree = d_tree(examples, features, [], 7)

        f = open(self.out_file, "wb")
        pickle.dump(self, f)
        f.close()
Пример #2
0
    def train(self, ensemble_size=5):
        """
        Learns an ensemble using adaboost and
        saves the model to a file.
        """

        examples = self.data["train"]
        features = set(examples[0].features.keys())
        sample = WeightedSample(examples)
        self.ensemble = []

        for i in range(ensemble_size):
            stump = d_tree(examples, features, [], 1)
            error = 0

            for ex in examples:
                decision = stump.decide(ex)

                if decision != ex.goal:
                    error += ex.weight

            for j in range(len(examples)):
                ex = examples[j]
                decision = stump.decide(ex)

                if decision == ex.goal:
                    new_weight = ex.weight * error/(sample.dist_sum - error)
                    sample.change_weight(j, new_weight)

            sample.normalize()
            stump.weight = math.log(sample.dist_sum - error)/error
            self.ensemble.append(stump)

        f = open(self.out_file, "wb")
        pickle.dump(self, f)
        f.close()