Пример #1
0
"""

# In[]
import HappyML.preprocessor as pp

dataset = pp.dataset("C:/Users/henry/Desktop/Python Training/Python機器學習/範例原始碼&「快樂版」函式庫/Ch05 Regression/50_Startups.csv")
X, Y = pp.decomposition(dataset, [0, 1, 2, 3], [4])

# X = pp.onehot_encoder(X, columns=[3])
# X = pp.remove_columns(X, [3])

X = pp.onehot_encoder(X, columns=[3], remove_trap=True)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y, train_size=0.8)

X_train, X_test= pp.feature_scaling(fit_ary=X_train, transform_arys=(X_train, X_test))
Y_train, Y_test= pp.feature_scaling(fit_ary=Y_train, transform_arys=(Y_train, Y_test))

# In[]
from HappyML.regression import SimpleRegressor

simple_reg = SimpleRegressor()
Y_pred_simple = simple_reg.fit(X_train, Y_train).predict(X_test)

# R-Squared always increase in multiple linear regression --> Use Adjusted R-Squared instead
print("Goodness of Model (R-Squared Score):", simple_reg.r_score(X_test, Y_test))

# In[]
from HappyML.regression import MultipleRegressor

X_train = pp.add_constant(X_train)
"""

# In[] Preprocessing
import HappyML.preprocessor as pp

# Load Dataset
dataset = pp.dataset(file="Mall_Customers.csv")

# Decomposition
X = pp.decomposition(dataset, x_columns=[1, 2, 3, 4])

# One-Hot Encoding
X = pp.onehot_encoder(ary=X, columns=[0], remove_trap=True)

# Feature Scaling (for PCA Feature Selection)
X = pp.feature_scaling(fit_ary=X, transform_arys=X)

# Feature Selection (PCA)
from HappyML.preprocessor import PCASelector

selector = PCASelector()
X = selector.fit(x_ary=X, verbose=True, plot=True).transform(x_ary=X)

# In[] K-Means Clustering with Fixed Clusters = 4 (Without HappyML)
# from sklearn.cluster import KMeans
# import time

# # K-Means Clustering with K=4
# kmeans = KMeans(n_clusters=4, init="k-means++", random_state=int(time.time()))
# Y_pred = kmeans.fit_predict(X)
Пример #3
0
import HappyML.preprocessor as pp
from HappyML.classification import SVM
from HappyML.performance import KFoldClassificationPerformance
import numpy as np
from HappyML.performance import GridSearch

# SVM without GridSearch
dataset = pp.dataset("Voice.csv")
X, Y = pp.decomposition(dataset, [i for i in range(20)], [20])
Y, Y_mapping = pp.label_encoder(Y, mapping=True)

selector = pp.KBestSelector()
X = selector.fit(X, Y, True, True).transform(X)

X_train, X_test, Y_train, Y_test = pp.split_train_test(X, Y)
X_train, X_test = pp.feature_scaling(X_train, (X_train, X_test))

classifier = SVM()
Y_pred = classifier.fit(X_train, Y_train).predict(X_test)

K = 10
kfp = KFoldClassificationPerformance(X, Y, classifier.classifier, K)

print("----- SVM Classification -----")
print("{} Folds Mean Accuracy: {}".format(K, kfp.accuracy()))
print("{} Folds Mean Recall: {}".format(K, kfp.recall()))
print("{} Folds Mean Precision: {}".format(K, kfp.precision()))
print("{} Folds Mean F1-score: {}".format(K, kfp.f_score()))

# SVM with GridSearch
X, Y = pp.decomposition(dataset, [i for i in range(20)], [20])
Пример #4
0
"""

import HappyML.preprocessor as pp
from HappyML.clustering import KMeansCluster
import HappyML.model_drawer as md
from HappyML.classification import DecisionTree
from HappyML.performance import KFoldClassificationPerformance
from IPython.display import Image, display

dataset = pp.dataset("CreditCards.csv")

dataset = pp.missing_data(dataset)

X = pp.decomposition(dataset, [i for i in range(18) if i != 0])

X = pp.feature_scaling(X, X)

selector = pp.PCASelector(best_k=2)
X = selector.fit(X).transform(X)

cluster = KMeansCluster()
Y_pred = cluster.fit(X).predict(X, "Customer Type")

md.cluster_drawer(X, Y_pred, cluster.centroids, "Customers Segmentation",
                  "Microsoft JhengHei")

dataset = pp.combine(dataset, Y_pred)

X, Y = pp.decomposition(dataset, [i for i in range(18) if i != 0], [18])

selector = pp.KBestSelector()
Пример #5
0
# In[] Import & Load data
import HappyML.preprocessor as pp

dataset = pp.dataset(file="CarEvaluation.csv")

# In[] Decomposition
X, Y = pp.decomposition(dataset,
                        x_columns=[i for i in range(4)],
                        y_columns=[4])

# In[] Missing Data
X = pp.missing_data(X, strategy="mean")

# In[] Categorical Data Encoding

# Label Encoding
Y, Y_mapping = pp.label_encoder(Y, mapping=True)

# One-Hot Encoding
X = pp.onehot_encoder(X, columns=[0])

# In[] Split Training Set, Testing Set
X_train, X_test, Y_train, Y_test = pp.split_train_test(X,
                                                       Y,
                                                       train_size=0.8,
                                                       random_state=0)

# In[] Feature Scaling for X_train, X_test
X_train, X_test = pp.feature_scaling(X_train, transform_arys=(X_train, X_test))