def get_bgm(n_clusters=10, wcp=1.0e3, tol=None): """ # wcp: higher number puts more mass in the center and will lead to more # more components being active """ covartype = 'full' #init_params = 'random' #alternative is kmeans init_params = 'kmeans' #alternative is kmeans #wcpt = 'dirichlet_process' #wcp shouldbe (float, float) wcpt = 'dirichlet_distribution' #wcp should be float warm_start = False verbose = False n_init = 1 max_iter = 500 reg_covar = 1e-5 #default is 1e-6 if not tol: tol = 1e-3 gmm = BGM(n_components=n_clusters, covariance_type=covartype, n_init=n_init, weight_concentration_prior_type=wcpt, init_params=init_params, max_iter=max_iter, verbose=verbose, reg_covar=reg_covar, tol=tol) return gmm
def _bgm_fit(self, x): """Fit a Bayesian Gaussian Mixture to the data given by x. Parameters ---------- x : array-like, shape (n_samples, n_attributes) The data to be fit. Returns ------- model : BayesianGaussianMixture from the sklearn package The BayesianGaussianMixture object that has been fit to the data. """ model = BGM(n_components=self.n_components, tol=self.tol, max_iter=self.max_iter, n_init=self.n_init, covariance_type=self.cov_type, weight_concentration_prior_type=self. weight_concentration_prior_type, weight_concentration_prior=self.weight_concentration_prior) data = x.astype('float32') model.fit(data) return model
def fit(self, X, Y): # assume classes are numbered 0...K-1 self.K = len(set(Y)) self.gaussians = [] self.p_y = np.zeros(self.K) for k in range(self.K): print("Fitting gmm", k) Xk = X[Y == k] self.p_y[k] = len(Xk) gmm = BGM(10) gmm.fit(Xk) self.gaussians.append(gmm) # normalize p(y) self.p_y /= self.p_y.sum()
def return_clustering(self, time_step, n_clusters=10): c_model = BGM(n_clusters, covariance_type='diag').fit(self.x_train) self.clustering = c_model if self.count % self.trading_window == 0: X_principal = pd.DataFrame(self.PCA(time_step, 2)[0]) X_principal.columns = ['P1', 'P2'] X_principal['clusters'] = self.clustering.predict(self.x_train) sns.pairplot(x_vars=["P1"], y_vars=["P2"], data=X_principal, hue="clusters", size=5) plt.show() self.count += 1
def copy_fit(bgm, method='bgm'): n_clusters = bgm.n_components covartype = bgm.covariance_type n_init = bgm.n_init max_iter = bgm.max_iter tol = bgm.tol verbose = True if method == 'bgm': wcpt = bgm.weight_concentration_prior_type reg_covar = bgm.reg_covar init_params = bgm.init_params tol = bgm.tol copy = BGM(n_components=n_clusters, covariance_type=covartype, n_init=n_init, weight_concentration_prior_type=wcpt, init_params=init_params, max_iter=max_iter, verbose=verbose, reg_covar=reg_covar, tol=tol) copy.weight_concentration_prior_ = bgm.weight_concentration_prior_ copy.weight_concentration_ = bgm.weight_concentration_ copy.mean_precision_prior = bgm.mean_precision_prior copy.mean_prior_ = bgm.mean_prior_ copy.mean_precision_ = bgm.mean_precision_ copy.covariance_prior_ = bgm.covariance_prior_ copy.degrees_of_freedom_prior_ = bgm.degrees_of_freedom_prior_ copy.degrees_of_freedom_ = bgm.degrees_of_freedom_ if method == 'gmm': copy = GMM(n_components=n_clusters, random_state=42, covariance_type=covartype, max_iter=max_iter, n_init=n_init, tol=tol, verbose=verbose) copy.means_ = bgm.means_ copy.covariances_ = bgm.covariances_ copy.weights_ = bgm.weights_ copy.precisions_ = bgm.precisions_ copy.precisions_cholesky_ = bgm.precisions_cholesky_ copy.converged_ = bgm.converged_ copy.n_iter_ = bgm.n_iter_ copy.lower_bound_ = bgm.lower_bound_ return copy
# [[[ 0.35313405 -0.06505064] # [-0.06505064 0.27590391]] # [[ 0.46521907 0.08082033] # [ 0.08082033 0.34744874]] # [[ 0.40641319 -0.07908257] # [-0.07908257 0.20770555]]] # print(gm.converged_) # True # print(gm.n_iter_) # 5 # print(gm.predict(X)) # 硬分群 # print(gm.predict_proba(X)) # 軟分群 # 高斯混合模型是生成模型, 也就是可以從裡面抽樣新實例 # import numpy as np # X_new, y_new = gm.sample(6) # densities = gm.score_samples(X) # 估算模型在任何位置的密度, 此方法可以估計它收到的實例位置的機率密度函數(PDF) # density_threshold = np.percentile(densities, 4) # anomalies = X[densities < density_threshold] # gm.bic(X) # gm.aic(X) # 貝氏高斯混合模型 from sklearn.mixture import BayesianGaussianMixture as BGM import numpy as np bgm = BGM(n_components=10, n_init=10) bgm.fit(X) print(np.round(bgm.weights_, 2))
Tprocess0 = time.time() print('\n', '## DATE PREPARATION RUNTIME:', Tprocess0 - Tstart) #Timer ## MAIN ## #load CAE model cae_model = load_model(cae_mfn) #Retrieve the ecoder layer Embedding_layer = K.function([cae_model.layers[0].input], [cae_model.layers[14].output]) input4bgmm = Embedding_layer([X_train[:]]) input4bgmm = np.array(input4bgmm) input4bgmm = input4bgmm[0] print(input4bgmm.shape) #clustering grouper = BGM(n_components=nCluster) grouper.fit(input4bgmm) if tosavemodel: #restore the model pickle.dump(grouper, open(savename, 'wb')) Tprocess1 = time.time() print('\n', '## CLUSTERING RUNTIME:', Tprocess1 - Tprocess0) #Timer end #brief examination y_pred = grouper.predict(input4bgmm) y_max = np.max(y_pred) y_proba = grouper.predict_proba( input4bgmm) #probability of being a certain group #group = [(number of group members): images, group label, probability for each group]