""" # In[] import HappyML.preprocessor as pp dataset = pp.dataset("C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/50_Startups.csv") X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4]) # X = pp.onehot_encoder(X, columns=[3]) # X = pp.remove_columns(X, [3]) X = pp.onehot_encoder(X, columns=[3], remove_trap=True) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8) X_train, X_test= pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test)) Y_train, Y_test= pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test)) # In[] from HappyML.regression import SimpleRegressor simple_reg = SimpleRegressor() Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test) # R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead print("Goodness of Model (R-Squared Score):", simple_reg.r_score(X_test, Y_test)) # In[] from HappyML.regression import MultipleRegressor X_train = pp.add_constant(X_train)
""" # In[] Preprocessing import HappyML.preprocessor as pp # Load Dataset dataset = pp.dataset(file="Mall_Customers.csv") # Decomposition X = pp.decomposition(dataset, x_columns=[1, 2, 3, 4]) # One-Hot Encoding X = pp.onehot_encoder(ary=X, columns=[0], remove_trap=True) # Feature Scaling (for PCA Feature Selection) X = pp.feature_scaling(fit_ary=X, transform_arys=X) # Feature Selection (PCA) from HappyML.preprocessor import PCASelector selector = PCASelector() X = selector.fit(x_ary=X, verbose=True, plot=True).transform(x_ary=X) # In[] K-Means Clustering with Fixed Clusters = 4 (Without HappyML) # from sklearn.cluster import KMeans # import time # # K-Means Clustering with K=4 # kmeans = KMeans(n_clusters=4, init="k-means++", random_state=int(time.time())) # Y_pred = kmeans.fit_predict(X)
import HappyML.preprocessor as pp from HappyML.classification import SVM from HappyML.performance import KFoldClassificationPerformance import numpy as np from HappyML.performance import GridSearch # SVM without GridSearch dataset = pp.dataset("Voice.csv") X, Y = pp.decomposition(dataset, [i for i in range(20)], [20]) Y, Y_mapping = pp.label_encoder(Y, mapping=True) selector = pp.KBestSelector() X = selector.fit(X, Y, True, True).transform(X) X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y) X_train, X_test = pp.feature_scaling(X_train, (X_train, X_test)) classifier = SVM() Y_pred = classifier.fit(X_train, Y_train).predict(X_test) K = 10 kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K) print("----- SVM Classification -----") print("{} Folds Mean Accuracy: {}".format(K, kfp.accuracy())) print("{} Folds Mean Recall: {}".format(K, kfp.recall())) print("{} Folds Mean Precision: {}".format(K, kfp.precision())) print("{} Folds Mean F1-score: {}".format(K, kfp.f_score())) # SVM with GridSearch X, Y = pp.decomposition(dataset, [i for i in range(20)], [20])
""" import HappyML.preprocessor as pp from HappyML.clustering import KMeansCluster import HappyML.model_drawer as md from HappyML.classification import DecisionTree from HappyML.performance import KFoldClassificationPerformance from IPython.display import Image, display dataset = pp.dataset("CreditCards.csv") dataset = pp.missing_data(dataset) X = pp.decomposition(dataset, [i for i in range(18) if i != 0]) X = pp.feature_scaling(X, X) selector = pp.PCASelector(best_k=2) X = selector.fit(X).transform(X) cluster = KMeansCluster() Y_pred = cluster.fit(X).predict(X, "Customer Type") md.cluster_drawer(X, Y_pred, cluster.centroids, "Customers Segmentation", "Microsoft JhengHei") dataset = pp.combine(dataset, Y_pred) X, Y = pp.decomposition(dataset, [i for i in range(18) if i != 0], [18]) selector = pp.KBestSelector()
# In[] Import & Load data import HappyML.preprocessor as pp dataset = pp.dataset(file="CarEvaluation.csv") # In[] Decomposition X, Y = pp.decomposition(dataset, x_columns=[i for i in range(4)], y_columns=[4]) # In[] Missing Data X = pp.missing_data(X, strategy="mean") # In[] Categorical Data Encoding # Label Encoding Y, Y_mapping = pp.label_encoder(Y, mapping=True) # One-Hot Encoding X = pp.onehot_encoder(X, columns=[0]) # In[] Split Training Set, Testing Set X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8, random_state=0) # In[] Feature Scaling for X_train, X_test X_train, X_test = pp.feature_scaling(X_train, transform_arys=(X_train, X_test))