def fit(self, x): n_sample = x.shape[0] self.dbw.fit(x) x_bins = self.dbw.transform(x) # 初始化模型参数 self.default_y_prob = np.log(0.5 / self.n_components) # 默认p_y for y_label in range(0, self.n_components): self.p_x_y[y_label] = {} self.p_y[y_label] = np.log(1.0 / self.n_components) # 初始p_y设置一样 self.default_x_prob[y_label] = np.log(0.5 / n_sample) # 默认p_x_y # 初始p_x_y设置一样 for j in range(0, x_bins.shape[1]): self.p_x_y[y_label][j] = {} x_j_set = set(x_bins[:, j]) for x_j in x_j_set: # 随机抽样计算条件概率 sample_x_index = np.random.choice( n_sample, n_sample // self.n_components) sample_x_bins = x_bins[sample_x_index] p_x_y = (np.sum(sample_x_bins[:, j] == x_j) + 1) / (sample_x_bins.shape[0] + len(x_j_set)) self.p_x_y[y_label][j][x_j] = np.log(p_x_y) # 计算隐变量 W_log = self.get_log_w(x_bins) W = utils.softmax(W_log) W_gen = np.exp(W_log) current_log_loss = np.log(W_gen.sum(axis=1)).sum() # 迭代训练 current_epoch = 0 for _ in range(0, self.n_iter): if self.verbose is True: utils.plot_decision_function(x, self.predict(x), self) utils.plt.pause(0.1) utils.plt.clf() # 更新模型参数 for k in range(0, self.n_components): self.p_y[k] = np.log(np.sum(W[:, k]) / n_sample) for j in range(0, x_bins.shape[1]): x_j_set = set(x_bins[:, j]) for x_j in x_j_set: self.p_x_y[k][j][x_j] = np.log( 1e-10 + np.sum(W[:, k] * (x_bins[:, j] == x_j)) / np.sum(W[:, k])) # 更新隐变量 W_log = self.get_log_w(x_bins) W = utils.softmax(W_log) W_gen = np.exp(W_log) # 计算log like hold new_log_loss = np.log(W_gen.sum(axis=1)).sum() if new_log_loss - current_log_loss > self.tol: current_log_loss = new_log_loss current_epoch += 1 else: print('total epochs:', current_epoch) break if self.verbose: utils.plot_decision_function(x, self.predict(x), self) utils.plt.show()
def fit(self, x): n_sample = x.shape[0] # 初始化模型参数 self.default_y_prob = np.log(0.5 / self.n_components) # 默认p_y for y_label in range(0, self.n_components): self.p_x_y[y_label] = {} self.p_y[y_label] = 1.0 / self.n_components # 初始p_y设置一样 # 初始p_x_y设置一样 for j in range(0, x.shape[1]): self.p_x_y[y_label][j] = {} u = np.mean(x[:, j], axis=0) + np.random.random() * ( x[:, j].max() + x[:, j].min()) / 2 sigma = np.std(x[:, j]) self.p_x_y[y_label][j] = [u, sigma] # 计算隐变量 W = self.get_w(x) current_log_loss = np.log(W.sum(axis=1)).sum() W = W / np.sum(W, axis=1, keepdims=True) # 迭代训练 current_epoch = 0 for _ in range(0, self.n_iter): if self.verbose is True: utils.plot_decision_function(x, self.predict(x), self) utils.plt.pause(0.1) utils.plt.clf() # 更新模型参数 for k in range(0, self.n_components): self.p_y[k] = np.sum(W[:, k]) / n_sample for j in range(0, x.shape[1]): x_j = x[:, j] u = np.sum(x_j * W[:, k]) / np.sum(W[:, k]) sigma = np.sqrt( np.sum( (x_j - u) * (x_j - u) * W[:, k]) / np.sum(W[:, k])) self.p_x_y[k][j] = [u, sigma] # 更新隐变量 W = self.get_w(x) new_log_loss = np.log(W.sum(axis=1)).sum() W = W / np.sum(W, axis=1, keepdims=True) if new_log_loss - current_log_loss > self.tol: current_log_loss = new_log_loss current_epoch += 1 else: print('total epochs:', current_epoch) break if self.verbose: utils.plot_decision_function(x, self.predict(x), self) utils.plt.show()
os.chdir('../') from sklearn.datasets import make_classification from ml_models import utils data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1, class_sep=3.0) from ml_models.svm import HardMarginSVM, SoftMarginSVM svm = SoftMarginSVM() svm.fit(data, target, show_train_process=True) # 计算F1 from sklearn.metrics import f1_score print(f1_score(target, svm.predict(data))) print(np.sum(np.abs(target - svm.predict(data)))) utils.plt.close() utils.plot_decision_function(data, target, svm, svm.support_vectors) utils.plt.show() print('support vector', svm.support_vectors)
from sklearn.datasets import make_classification from ml_models import utils from ml_models.linear_model import LogisticRegression from ml_models.tree import CARTClassifier from ml_models.svm import SVC data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1, class_sep=0.5) X_train, X_test, y_train, y_test = model_selection.train_test_split( data, target, test_size=0.1) from ml_models.ensemble import BaggingClassifier classifier = BaggingClassifier(base_estimator=CARTClassifier(), n_estimators=3) classifier.fit(X_train, y_train) # # 计算F1 from sklearn.metrics import f1_score print(f1_score(y_test, classifier.predict(X_test))) print(np.sum(np.abs(y_test - classifier.predict(X_test)))) # utils.plot_decision_function(X_train, y_train, classifier) utils.plt.show()
from sklearn.datasets.samples_generator import make_blobs import numpy as np X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) X = X[:, ::-1] # 将0,2类归为0类 y = np.where(y == 2, 0, y) # 将1,3类归为1类 y = np.where(y == 3, 1, y) from ml_models.cluster import LVQ kmeans = LVQ(class_label=[0, 0, 1, 1]) kmeans.fit(X, y) from ml_models import utils utils.plot_decision_function(X, y, kmeans) utils.plt.show()
import os os.chdir('../') from sklearn.datasets import make_classification from ml_models import utils data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1, class_sep=.3) from ml_models.tree import DecisionTreeClassifier tree = DecisionTreeClassifier(max_bins=20) tree.fit(data, target) tree.prune(alpha=1) # # 计算F1 from sklearn.metrics import f1_score print(f1_score(target, tree.predict(data))) print(np.sum(np.abs(target - tree.predict(data)))) # utils.plot_decision_function(data, target, tree) utils.plt.show()
import numpy as np import os from sklearn import model_selection os.chdir('../') from sklearn.datasets import make_classification from ml_models import utils data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1, class_sep=.3, random_state=44) X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1, random_state=0) from ml_models.tree import CARTClassifier tree = CARTClassifier() tree.fit(X_train, y_train) tree.prune(5) # # 计算F1 from sklearn.metrics import f1_score print(f1_score(y_test, tree.predict(X_test))) print(np.sum(np.abs(y_test - tree.predict(X_test)))) # utils.plot_decision_function(X_train, y_train, tree) utils.plt.show()
from sklearn.datasets.samples_generator import make_blobs X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) X = X[:, ::-1] from ml_models.cluster import AGNES agnes = AGNES(k=4) agnes.fit(X) from ml_models import utils utils.plot_decision_function(X, y, agnes) utils.plt.show()
from sklearn.datasets.samples_generator import make_blobs from ml_models import utils X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) X = X[:, ::-1] from ml_models.pgm import SemiGaussianNBClassifier nb = SemiGaussianNBClassifier(link_rulers=[(0, 1)]) nb.fit(X, y) print(nb.predict_proba(X).shape) utils.plot_decision_function(X, y, nb) utils.plt.show()
from sklearn.datasets.samples_generator import make_blobs from ml_models import utils X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) X = X[:, ::-1] from ml_models.em import GMMClassifier gmm = GMMClassifier(n_iter=100) gmm.fit(X, y) print(gmm.predict(X)) utils.plot_decision_function(X, y, gmm) utils.plt.show()
""" 二分类 """ from ml_models import utils from sklearn.datasets import make_classification data, target = make_classification(n_samples=200, n_features=2, n_classes=2, n_informative=1, n_redundant=0, n_repeated=0, n_clusters_per_class=1) # ffm=FFM(batch_size=1, epochs=20, solver='adam',objective='logistic') # ffm.fit(data,target,show_log=True) # utils.plot_decision_function(data,target,ffm) # utils.plt.show() """ 多分类 """ from sklearn.datasets.samples_generator import make_blobs X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0) X = X[:, ::-1] from ml_models.wrapper_models import * # ffm = FFM(epochs=10, solver='adam', objective='logistic') ovo = MultiClassWrapper(ffm, mode='ovo') ovo.fit(X, y) utils.plot_decision_function(X, y, ovo) utils.plt.show()
from sklearn.datasets import make_classification, make_moons import numpy as np from ml_models import utils from ml_models.svm import SVC X, y = make_classification(n_samples=500, n_features=2, n_informative=2, n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=1, weights=[0.05, 0.95], class_sep=3, flip_y=0.05, random_state=0) # X, y = make_moons(noise=0.01) weights = np.where(y == 0, 50, 1) svc_with_sample_weight = SVC(kernel='rbf', gamma=2.0) svc_with_sample_weight.fit(X, y, sample_weight=weights, show_train_process=True) utils.plot_decision_function(X=X, y=y, clf=svc_with_sample_weight)