Пример #1
0
    def fit(self, x):
        n_sample = x.shape[0]
        self.dbw.fit(x)
        x_bins = self.dbw.transform(x)
        # 初始化模型参数
        self.default_y_prob = np.log(0.5 / self.n_components)  # 默认p_y
        for y_label in range(0, self.n_components):
            self.p_x_y[y_label] = {}
            self.p_y[y_label] = np.log(1.0 / self.n_components)  # 初始p_y设置一样
            self.default_x_prob[y_label] = np.log(0.5 / n_sample)  # 默认p_x_y
            # 初始p_x_y设置一样
            for j in range(0, x_bins.shape[1]):
                self.p_x_y[y_label][j] = {}
                x_j_set = set(x_bins[:, j])
                for x_j in x_j_set:
                    # 随机抽样计算条件概率
                    sample_x_index = np.random.choice(
                        n_sample, n_sample // self.n_components)
                    sample_x_bins = x_bins[sample_x_index]
                    p_x_y = (np.sum(sample_x_bins[:, j] == x_j) +
                             1) / (sample_x_bins.shape[0] + len(x_j_set))
                    self.p_x_y[y_label][j][x_j] = np.log(p_x_y)
        # 计算隐变量
        W_log = self.get_log_w(x_bins)
        W = utils.softmax(W_log)
        W_gen = np.exp(W_log)
        current_log_loss = np.log(W_gen.sum(axis=1)).sum()
        # 迭代训练
        current_epoch = 0
        for _ in range(0, self.n_iter):
            if self.verbose is True:
                utils.plot_decision_function(x, self.predict(x), self)
                utils.plt.pause(0.1)
                utils.plt.clf()
            # 更新模型参数
            for k in range(0, self.n_components):
                self.p_y[k] = np.log(np.sum(W[:, k]) / n_sample)
                for j in range(0, x_bins.shape[1]):
                    x_j_set = set(x_bins[:, j])
                    for x_j in x_j_set:
                        self.p_x_y[k][j][x_j] = np.log(
                            1e-10 +
                            np.sum(W[:, k] *
                                   (x_bins[:, j] == x_j)) / np.sum(W[:, k]))

            # 更新隐变量
            W_log = self.get_log_w(x_bins)
            W = utils.softmax(W_log)
            W_gen = np.exp(W_log)
            # 计算log like hold
            new_log_loss = np.log(W_gen.sum(axis=1)).sum()
            if new_log_loss - current_log_loss > self.tol:
                current_log_loss = new_log_loss
                current_epoch += 1
            else:
                print('total epochs:', current_epoch)
                break
        if self.verbose:
            utils.plot_decision_function(x, self.predict(x), self)
            utils.plt.show()
Пример #2
0
    def fit(self, x):
        n_sample = x.shape[0]

        # 初始化模型参数
        self.default_y_prob = np.log(0.5 / self.n_components)  # 默认p_y
        for y_label in range(0, self.n_components):
            self.p_x_y[y_label] = {}
            self.p_y[y_label] = 1.0 / self.n_components  # 初始p_y设置一样
            # 初始p_x_y设置一样
            for j in range(0, x.shape[1]):
                self.p_x_y[y_label][j] = {}
                u = np.mean(x[:, j], axis=0) + np.random.random() * (
                    x[:, j].max() + x[:, j].min()) / 2
                sigma = np.std(x[:, j])
                self.p_x_y[y_label][j] = [u, sigma]

        # 计算隐变量
        W = self.get_w(x)
        current_log_loss = np.log(W.sum(axis=1)).sum()
        W = W / np.sum(W, axis=1, keepdims=True)
        # 迭代训练
        current_epoch = 0
        for _ in range(0, self.n_iter):
            if self.verbose is True:
                utils.plot_decision_function(x, self.predict(x), self)
                utils.plt.pause(0.1)
                utils.plt.clf()
            # 更新模型参数
            for k in range(0, self.n_components):
                self.p_y[k] = np.sum(W[:, k]) / n_sample
                for j in range(0, x.shape[1]):
                    x_j = x[:, j]
                    u = np.sum(x_j * W[:, k]) / np.sum(W[:, k])
                    sigma = np.sqrt(
                        np.sum(
                            (x_j - u) * (x_j - u) * W[:, k]) / np.sum(W[:, k]))
                    self.p_x_y[k][j] = [u, sigma]

            # 更新隐变量
            W = self.get_w(x)
            new_log_loss = np.log(W.sum(axis=1)).sum()
            W = W / np.sum(W, axis=1, keepdims=True)
            if new_log_loss - current_log_loss > self.tol:
                current_log_loss = new_log_loss
                current_epoch += 1
            else:
                print('total epochs:', current_epoch)
                break
        if self.verbose:
            utils.plot_decision_function(x, self.predict(x), self)
            utils.plt.show()
Пример #3
0
os.chdir('../')

from sklearn.datasets import make_classification
from ml_models import utils

data, target = make_classification(n_samples=100,
                                   n_features=2,
                                   n_classes=2,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_clusters_per_class=1,
                                   class_sep=3.0)

from ml_models.svm import HardMarginSVM, SoftMarginSVM

svm = SoftMarginSVM()
svm.fit(data, target, show_train_process=True)

# 计算F1
from sklearn.metrics import f1_score

print(f1_score(target, svm.predict(data)))
print(np.sum(np.abs(target - svm.predict(data))))

utils.plt.close()
utils.plot_decision_function(data, target, svm, svm.support_vectors)
utils.plt.show()
print('support vector', svm.support_vectors)
Пример #4
0
from sklearn.datasets import make_classification
from ml_models import utils
from ml_models.linear_model import LogisticRegression
from ml_models.tree import CARTClassifier
from ml_models.svm import SVC

data, target = make_classification(n_samples=100,
                                   n_features=2,
                                   n_classes=2,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_clusters_per_class=1,
                                   class_sep=0.5)
X_train, X_test, y_train, y_test = model_selection.train_test_split(
    data, target, test_size=0.1)

from ml_models.ensemble import BaggingClassifier

classifier = BaggingClassifier(base_estimator=CARTClassifier(), n_estimators=3)
classifier.fit(X_train, y_train)
# # 计算F1
from sklearn.metrics import f1_score

print(f1_score(y_test, classifier.predict(X_test)))
print(np.sum(np.abs(y_test - classifier.predict(X_test))))
#
utils.plot_decision_function(X_train, y_train, classifier)
utils.plt.show()
Пример #5
0
from sklearn.datasets.samples_generator import make_blobs
import numpy as np

X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0)
X = X[:, ::-1]

# 将0,2类归为0类
y = np.where(y == 2, 0, y)
# 将1,3类归为1类
y = np.where(y == 3, 1, y)

from ml_models.cluster import LVQ

kmeans = LVQ(class_label=[0, 0, 1, 1])
kmeans.fit(X, y)

from ml_models import utils

utils.plot_decision_function(X, y, kmeans)
utils.plt.show()
Пример #6
0
import os

os.chdir('../')

from sklearn.datasets import make_classification
from ml_models import utils

data, target = make_classification(n_samples=100,
                                   n_features=2,
                                   n_classes=2,
                                   n_informative=1,
                                   n_redundant=0,
                                   n_repeated=0,
                                   n_clusters_per_class=1,
                                   class_sep=.3)

from ml_models.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_bins=20)
tree.fit(data, target)
tree.prune(alpha=1)

# # 计算F1
from sklearn.metrics import f1_score

print(f1_score(target, tree.predict(data)))
print(np.sum(np.abs(target - tree.predict(data))))
#
utils.plot_decision_function(data, target, tree)
utils.plt.show()
Пример #7
0
import numpy as np
import os
from sklearn import model_selection

os.chdir('../')

from sklearn.datasets import make_classification
from ml_models import utils

data, target = make_classification(n_samples=100, n_features=2, n_classes=2, n_informative=1, n_redundant=0,
                                   n_repeated=0, n_clusters_per_class=1, class_sep=.3, random_state=44)
X_train, X_test, y_train, y_test = model_selection.train_test_split(data, target, test_size=0.1, random_state=0)

from ml_models.tree import CARTClassifier

tree = CARTClassifier()
tree.fit(X_train, y_train)
tree.prune(5)

# # 计算F1
from sklearn.metrics import f1_score

print(f1_score(y_test, tree.predict(X_test)))
print(np.sum(np.abs(y_test - tree.predict(X_test))))
#
utils.plot_decision_function(X_train, y_train, tree)
utils.plt.show()
Пример #8
0
from sklearn.datasets.samples_generator import make_blobs

X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0)
X = X[:, ::-1]

from ml_models.cluster import AGNES

agnes = AGNES(k=4)
agnes.fit(X)

from ml_models import utils
utils.plot_decision_function(X, y, agnes)
utils.plt.show()
from sklearn.datasets.samples_generator import make_blobs
from ml_models import utils

X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0)
X = X[:, ::-1]

from ml_models.pgm import SemiGaussianNBClassifier

nb = SemiGaussianNBClassifier(link_rulers=[(0, 1)])
nb.fit(X, y)
print(nb.predict_proba(X).shape)
utils.plot_decision_function(X, y, nb)
utils.plt.show()
Пример #10
0
from sklearn.datasets.samples_generator import make_blobs
from ml_models import utils

X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0)
X = X[:, ::-1]

from ml_models.em import GMMClassifier

gmm = GMMClassifier(n_iter=100)
gmm.fit(X, y)
print(gmm.predict(X))
utils.plot_decision_function(X, y, gmm)
utils.plt.show()
Пример #11
0
"""
二分类
"""
from ml_models import utils

from sklearn.datasets import make_classification

data, target = make_classification(n_samples=200, n_features=2, n_classes=2, n_informative=1, n_redundant=0,
                                   n_repeated=0, n_clusters_per_class=1)

# ffm=FFM(batch_size=1, epochs=20, solver='adam',objective='logistic')
# ffm.fit(data,target,show_log=True)
# utils.plot_decision_function(data,target,ffm)
# utils.plt.show()


"""
多分类
"""
from sklearn.datasets.samples_generator import make_blobs

X, y = make_blobs(n_samples=400, centers=4, cluster_std=0.85, random_state=0)
X = X[:, ::-1]
from ml_models.wrapper_models import *
#
ffm = FFM(epochs=10, solver='adam', objective='logistic')
ovo = MultiClassWrapper(ffm, mode='ovo')
ovo.fit(X, y)
utils.plot_decision_function(X, y, ovo)
utils.plt.show()
Пример #12
0
from sklearn.datasets import make_classification, make_moons
import numpy as np
from ml_models import utils
from ml_models.svm import SVC

X, y = make_classification(n_samples=500, n_features=2,
                           n_informative=2, n_redundant=0,
                           n_repeated=0, n_classes=2,
                           n_clusters_per_class=1, weights=[0.05, 0.95],
                           class_sep=3, flip_y=0.05, random_state=0)
# X, y = make_moons(noise=0.01)

weights = np.where(y == 0, 50, 1)
svc_with_sample_weight = SVC(kernel='rbf', gamma=2.0)
svc_with_sample_weight.fit(X, y, sample_weight=weights, show_train_process=True)
utils.plot_decision_function(X=X, y=y, clf=svc_with_sample_weight)