def wine_example(): x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] _filter = Filter(FilterType.corr, 3) x_filter = _filter.fit_transform(x, y) print(x_filter.shape) _filter = Filter(FilterType.var, 3) x_filter = _filter.fit_transform(x, y) print(x_filter.shape) _filter = Filter(FilterType.entropy, 3) x_filter = _filter.fit_transform(x, y) print(x_filter.shape) embedded = Embedded(3, EmbeddedType.Lasso) x_embedded = embedded.fit_transform(x, y) print(x_embedded.shape) # lasso后稀疏到只有两个值非0,因此只输出了两个特征 # GBDT暂时只支持离散特征 embedded = Embedded(3, EmbeddedType.GBDT) x = np.random.choice([0, 1], 50).reshape(10, 5) y = np.random.rand(10) x_embedded = embedded.fit_transform(x, y) print(x_embedded, y)
def wine_example(): x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) # 贝叶斯最小错误率 bme = BayesMinimumError() bme.fit(x_train, y_train) print(bme.score(x_test, y_test)) bme.classify_plot(x_test, y_test) # 贝叶斯最小风险,需要给定风险矩阵 # 风险矩阵 [[0,100], [10,0]] 表示把0分为1(存伪)的损失为100,把1分为0(弃真)的损失为10 bmr = BayesMinimumRisk(np.array([[0, 100], [10, 0]])) bmr.fit(x_train, y_train) bmr.predict(x_test) print(bmr.score(x_test, y_test)) bmr.classify_plot(x_test, y_test) # 朴素贝叶斯 nb = NaiveBayes() nb.fit(x_train, y_train) nb.predict(x_test) print(nb.score(x_test, y_test)) nb.classify_plot(x_test, y_test)
def moon_example(): """ AdaBoost的例子,以月亮数据集为例 :return: """ x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) # 采用Logistic回归作为子分类器的AdaBoost ada = AdaBoost(classifier=ClassifierType.LR) ada.fit(x_train, y_train) print(ada.score(x_test, y_test)) ada.classify_plot(x_test, y_test, ", LR") # 采用KNN作为子分类器的AdaBoost ada = AdaBoost(classifier=ClassifierType.KNN) ada.fit(x_train, y_train) print(ada.score(x_test, y_test)) ada.classify_plot(x_test, y_test, ", KNN") # 采用CART树为子分类器的AdaBoost ada = AdaBoost(classifier=ClassifierType.CART) ada.fit(x_train, y_train) print(ada.score(x_test, y_test)) ada.classify_plot(x_test, y_test, ", CART")
def watermelon_example(): """ GBDT的例子,以西瓜数据集为例 GBDT暂时只支持回归操作,不支持分类 :return: """ x, y = get_watermelon() y = x[:, -1] # y为连续标签 x = x[:, :-1] # x为离散标签 x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) gbdt = GBDT(learning_rate=1) gbdt.fit(x_train, y_train) print(gbdt.predict(x_test), y_test) print("R square: %.4f" % gbdt.score(x_test, y_test)) x, y = get_wine() y = x[:, -1] # y为连续标签 x = x[:, :-1] # x为离散标签 x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) gbdt = GBDT(learning_rate=1) gbdt.fit(x_train, y_train) print(gbdt.predict(x_test), y_test) print("R square: %.4f" % gbdt.score(x_test, y_test))
def random_forest_example(): x, y = get_wine() # x = x[(y == 0) | (y == 1)] # y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) rf = RandomForest(4, 50) rf.fit(x_train, y_train) print(rf.score(x_test, y_test)) rf.classify_plot(x_test, y_test)
def AdaBoost_multi_class_example(): x, y = get_wine() x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) ada = AdaBoost(classifier=ClassifierType.LR) ada.fit(x_train, y_train) print(ada.predict(x_test)) ada.classify_plot(x_test, y_test, ", LR") ada = AdaBoost(classifier=ClassifierType.CART) ada.fit(x_train, y_train) print(ada.predict(x_test)) ada.classify_plot(x_test, y_test, ", CART") ada = AdaBoost(classifier=ClassifierType.KNN) ada.fit(x_train, y_train) print(ada.predict(x_test)) ada.classify_plot(x_test, y_test, ", CART")
def wine_example(): x, y = get_wine() #x = x[(y == 0) | (y == 1)] #y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) cart = CART() cart.fit(x_train, y_train) print(cart.score(x_test, y_test)) cart.classify_plot(x_test, y_test) y = x[:, -1] x = x[:, :-1] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) cart = CART() cart.fit(x_train, y_train) print(cart.score(x_test, y_test))
def wine_example(): x, y = get_wine() x = x[(y == 0) | (y == 1)] y = y[(y == 0) | (y == 1)] x_train, y_train, x_test, y_test = train_test_split(x, y, 0.3, 918) logistic = BaseLogisticRegression(has_intercept=False) logistic.fit(x_train, y_train) print(logistic.w) logistic.classify_plot(x_test, y_test) logistic.auc_plot(x_test, y_test) lasso = Lasso() lasso.fit(x_train, y_train) print(lasso.w) lasso.classify_plot(x_test, y_test) ridge = Ridge() ridge.fit(x_train, y_train) print(ridge.w) ridge.classify_plot(x_test, y_test)