def train(self, data, labels):
     self.data = data
     self.labels = labels
     for i in range(self.num_trees):
         sample_index = np.random.choice(self.data.shape[0],
                                         self.num_sample,
                                         replace=True)
         train_data = self.data[sample_index, :]
         train_labels = self.labels[sample_index]
         tree = DecisionTree(self.max_depth, self.num_feature)
         tree.train(train_data, train_labels)
         self.trees.append(tree)
Exemplo n.º 2
0
    def test_all_file(self):
        options = {
            'df': pd.read_csv("benchmark.csv", sep=';'),
            'label_column': "Joga"
        }
        tr = DecisionTree()
        model = tr.train(options)

        for _, row in options['df'].iterrows():
            target_label = row["Joga"]
            predicted = model.predict(row.drop("Joga"))
            self.assertEqual(target_label, predicted)
Exemplo n.º 3
0
    def test_benchmark(self):
        options = {
            'df': pd.read_csv("benchmark.csv", sep=';'),
            'label_column': "Joga"
        }
        tr = DecisionTree()
        model = tr.train(options)

        inf_data = pd.Series(
            ["Ensolarado", "Quente", "Normal", "Verdadeiro"],
            index=["Tempo", "Temperatura", "Umidade", "Ventoso"],
            name="InferenceData")
        self.assertEqual(model.predict(inf_data), 'Sim')
Exemplo n.º 4
0
    def train(self, options):
        """
        train a random forest, using n_trees decision trees
        options['df']: pandas dataframe
        options['n_trees']: number of trees
        options['label_column']: label column to be predicted
        options['bootstrap_size']: the size of the bootstrap, entries not used in the bootstrap will be ignored
        """
        num_trees = options['n_trees']
        df = options['df']
        bootstrap_size = options['bootstrap_size']

        tree_options = {
            'label_column': options['label_column']
        }
        for i in range(num_trees):
            tree_options['df'] = get_bootstrap(df, bootstrap_size)
            new_tree = DecisionTree()
            self.ensemble.append(new_tree.train(tree_options))

        return self
Exemplo n.º 5
0
cat_data_test = np.array(cat_data_test, dtype='float')

# zip categorical and non-categorical data together

train_data = np.concatenate((cat_data, non_cat_data), axis=1)
train_label = data[:, -1].astype(int)
validation_data = train_data[:200, :]
validation_label = train_label[:200]
train_data = train_data[:, :]
train_label = train_label[:]
test_data = np.concatenate((cat_data_test, non_cat_data_test), axis=1)


# decision tree
tree = DecisionTree(5, train_data.shape[0])
tree.train(train_data, train_label)
res = tree.predict(validation_data)
score = 0
for i in range(len(res)):
    if res[i] == validation_label[i]:
        score += 1
score /= len(res)
print(score)


# random forest

forest = RandomForest(100,5,train_data.shape[0],6)
forest.train(train_data, train_label)
res = forest.predict(validation_data)