Exemplo n.º 1
0
    def test_end_to_end(self):
        model = RandomForestClassifier()

        train_data = pd.read_csv("data/train.csv")
        train_data = train_data[:300]

        labels = train_data['label'].values
        x = np.array(train_data.drop('label', axis=1))
        y = labels

        model.fit(data_preprocess(x), y)
        y_predict = model.predict(data_preprocess(x))
        self.assertGreater(f1_score(y, y_predict), 0)
Exemplo n.º 2
0
                                                split_size=split_size,
                                                max_features=max_features)
                    RF.fit(X_train, Y_train)

                    acc.append(f1_score(y_test, RF.predict(x_test)))

                if np.mean(acc) > accuracy:
                    accuracy = np.mean(acc)
                    best_n_estimators = n_estimators
                    best_split_size = split_size
                    best_max_features = max_features

    print("[NUMBER OF ESTIMATORS]", best_n_estimators)
    print("[SPLIT SIZE]          ", best_split_size)
    print("[MAX FEATURES]        ", best_max_features)
    print("[ACCURACY]            ", accuracy)


model = RandomForestClassifier()

train_data = pd.read_csv("data/train.csv", encoding="latin1")
df = pd.DataFrame(train_data)

labels = train_data['label'].values
x_train = np.array(train_data.drop('label', axis=1))
y_train = labels

x_train = data_preprocess(x_train)

tune(x_train, y_train)
Exemplo n.º 3
0
 def test_data_preprocess(self):
     data = np.ones([5, 3])
     result = data_preprocess(data)
     self.assertEqual(data.shape[0], result.shape[0])