示例#1
0
def get_bgm(n_clusters=10, wcp=1.0e3, tol=None):
    """
    # wcp: higher number puts more mass in the center and will lead to more
    # more components being active
    """
    covartype = 'full'
    #init_params = 'random' #alternative is kmeans
    init_params = 'kmeans'  #alternative is kmeans
    #wcpt = 'dirichlet_process'       #wcp shouldbe (float, float)
    wcpt = 'dirichlet_distribution'  #wcp should be float
    warm_start = False
    verbose = False
    n_init = 1
    max_iter = 500
    reg_covar = 1e-5  #default is 1e-6
    if not tol: tol = 1e-3
    gmm = BGM(n_components=n_clusters,
              covariance_type=covartype,
              n_init=n_init,
              weight_concentration_prior_type=wcpt,
              init_params=init_params,
              max_iter=max_iter,
              verbose=verbose,
              reg_covar=reg_covar,
              tol=tol)
    return gmm
示例#2
0
    def _bgm_fit(self, x):
        """Fit a Bayesian Gaussian Mixture to the data given by x.

        Parameters
        ----------
        x : array-like, shape (n_samples, n_attributes)
            The data to be fit.

        Returns
        -------
        model : BayesianGaussianMixture from the sklearn package
            The BayesianGaussianMixture object that has been fit to the data.
        """
        model = BGM(n_components=self.n_components,
                    tol=self.tol,
                    max_iter=self.max_iter,
                    n_init=self.n_init,
                    covariance_type=self.cov_type,
                    weight_concentration_prior_type=self.
                    weight_concentration_prior_type,
                    weight_concentration_prior=self.weight_concentration_prior)
        data = x.astype('float32')
        model.fit(data)

        return model
示例#3
0
    def fit(self, X, Y):
        # assume classes are numbered 0...K-1
        self.K = len(set(Y))

        self.gaussians = []
        self.p_y = np.zeros(self.K)
        for k in range(self.K):
            print("Fitting gmm", k)
            Xk = X[Y == k]
            self.p_y[k] = len(Xk)
            gmm = BGM(10)
            gmm.fit(Xk)
            self.gaussians.append(gmm)
        # normalize p(y)
        self.p_y /= self.p_y.sum()
示例#4
0
    def return_clustering(self, time_step, n_clusters=10):

        c_model = BGM(n_clusters, covariance_type='diag').fit(self.x_train)

        self.clustering = c_model

        if self.count % self.trading_window == 0:
            X_principal = pd.DataFrame(self.PCA(time_step, 2)[0])
            X_principal.columns = ['P1', 'P2']
            X_principal['clusters'] = self.clustering.predict(self.x_train)
            sns.pairplot(x_vars=["P1"],
                         y_vars=["P2"],
                         data=X_principal,
                         hue="clusters",
                         size=5)
            plt.show()
        self.count += 1
示例#5
0
def copy_fit(bgm, method='bgm'):
    n_clusters = bgm.n_components
    covartype = bgm.covariance_type
    n_init = bgm.n_init
    max_iter = bgm.max_iter
    tol = bgm.tol
    verbose = True
    if method == 'bgm':
        wcpt = bgm.weight_concentration_prior_type
        reg_covar = bgm.reg_covar
        init_params = bgm.init_params
        tol = bgm.tol
        copy = BGM(n_components=n_clusters,
                   covariance_type=covartype,
                   n_init=n_init,
                   weight_concentration_prior_type=wcpt,
                   init_params=init_params,
                   max_iter=max_iter,
                   verbose=verbose,
                   reg_covar=reg_covar,
                   tol=tol)
        copy.weight_concentration_prior_ = bgm.weight_concentration_prior_
        copy.weight_concentration_ = bgm.weight_concentration_
        copy.mean_precision_prior = bgm.mean_precision_prior
        copy.mean_prior_ = bgm.mean_prior_
        copy.mean_precision_ = bgm.mean_precision_
        copy.covariance_prior_ = bgm.covariance_prior_
        copy.degrees_of_freedom_prior_ = bgm.degrees_of_freedom_prior_
        copy.degrees_of_freedom_ = bgm.degrees_of_freedom_
    if method == 'gmm':
        copy = GMM(n_components=n_clusters,
                   random_state=42,
                   covariance_type=covartype,
                   max_iter=max_iter,
                   n_init=n_init,
                   tol=tol,
                   verbose=verbose)
    copy.means_ = bgm.means_
    copy.covariances_ = bgm.covariances_
    copy.weights_ = bgm.weights_
    copy.precisions_ = bgm.precisions_
    copy.precisions_cholesky_ = bgm.precisions_cholesky_
    copy.converged_ = bgm.converged_
    copy.n_iter_ = bgm.n_iter_
    copy.lower_bound_ = bgm.lower_bound_
    return copy
示例#6
0
# [[[ 0.35313405 -0.06505064]
#   [-0.06505064  0.27590391]]

#  [[ 0.46521907  0.08082033]
#   [ 0.08082033  0.34744874]]

#  [[ 0.40641319 -0.07908257]
#   [-0.07908257  0.20770555]]]

# print(gm.converged_) # True
# print(gm.n_iter_) # 5

# print(gm.predict(X)) # 硬分群
# print(gm.predict_proba(X)) # 軟分群

# 高斯混合模型是生成模型, 也就是可以從裡面抽樣新實例
# import numpy as np
# X_new, y_new = gm.sample(6)
# densities = gm.score_samples(X) # 估算模型在任何位置的密度, 此方法可以估計它收到的實例位置的機率密度函數(PDF)
# density_threshold = np.percentile(densities, 4)
# anomalies = X[densities < density_threshold]

# gm.bic(X)
# gm.aic(X)

# 貝氏高斯混合模型
from sklearn.mixture import BayesianGaussianMixture as BGM
import numpy as np
bgm = BGM(n_components=10, n_init=10)
bgm.fit(X)
print(np.round(bgm.weights_, 2))
示例#7
0
Tprocess0 = time.time()
print('\n', '## DATE PREPARATION RUNTIME:', Tprocess0 - Tstart)  #Timer

## MAIN ##
#load CAE model
cae_model = load_model(cae_mfn)
#Retrieve the ecoder layer
Embedding_layer = K.function([cae_model.layers[0].input],
                             [cae_model.layers[14].output])
input4bgmm = Embedding_layer([X_train[:]])
input4bgmm = np.array(input4bgmm)
input4bgmm = input4bgmm[0]
print(input4bgmm.shape)

#clustering
grouper = BGM(n_components=nCluster)
grouper.fit(input4bgmm)
if tosavemodel:
    #restore the model
    pickle.dump(grouper, open(savename, 'wb'))

Tprocess1 = time.time()
print('\n', '## CLUSTERING RUNTIME:', Tprocess1 - Tprocess0)  #Timer end

#brief examination
y_pred = grouper.predict(input4bgmm)
y_max = np.max(y_pred)
y_proba = grouper.predict_proba(
    input4bgmm)  #probability of being a certain group

#group = [(number of group members): images, group label, probability for each group]