def train(self): """ Learns a decision tree and saves the model to a file. """ examples = self.data["train"] features = set(examples[0].features.keys()) self.tree = d_tree(examples, features, [], 7) f = open(self.out_file, "wb") pickle.dump(self, f) f.close()
def train(self, ensemble_size=5): """ Learns an ensemble using adaboost and saves the model to a file. """ examples = self.data["train"] features = set(examples[0].features.keys()) sample = WeightedSample(examples) self.ensemble = [] for i in range(ensemble_size): stump = d_tree(examples, features, [], 1) error = 0 for ex in examples: decision = stump.decide(ex) if decision != ex.goal: error += ex.weight for j in range(len(examples)): ex = examples[j] decision = stump.decide(ex) if decision == ex.goal: new_weight = ex.weight * error/(sample.dist_sum - error) sample.change_weight(j, new_weight) sample.normalize() stump.weight = math.log(sample.dist_sum - error)/error self.ensemble.append(stump) f = open(self.out_file, "wb") pickle.dump(self, f) f.close()