def test_wrong_row_name_number(): with pytest.raises(AssertionError) as excinfo: heatmap(np.random.random((10, 5)), row_names=['a', 'b', 'c']) assert excinfo.value.message == ('len(column_names) (got 3)' ' should be equal to number of' ' rows in the input ' ' array (expect 10).')
def plot_corr(X, y, output_path='./output/plot', plot_it=False): ''' 绘制Pearson特征相关性热力图 Input: X, y: 预处理过的数据集 并在计算相关性前进行归一化 output_path: 保存路径 plot_it: 是否显示 Output: corr_matrix.png: 热力图 col_dict: 图上index对应的特征名 ''' X = X.drop(['name', 'date'], axis=1) X['y'] = y # Pearson correlation coefficience matrix cm = X.corr(method='pearson') col_dict = {k: v for k, v in enumerate(X.columns)} hm = heatmap(np.array(cm), row_names=list(col_dict.keys()), column_names=list(col_dict.keys()), figsize=(40, 40)) plt.title('Features Correlation Heatmap', fontsize=20) # output plot and col_dict if not os.path.isdir(output_path): os.makedirs(output_path) plt.savefig(os.path.join(output_path, 'corr_matrix.png'), dpi=300) with open(os.path.join(output_path, 'col_dict.json'), 'w') as f: json.dump(col_dict, f) if plot_it: plt.show()
def plot_correlations(self, df : pd.DataFrame, cols=[], postfix='') -> None: no_date_df = df.drop(columns=['date']) if not cols: cols = list(no_date_df.columns) corr = np.corrcoef(no_date_df[cols].values.T) heat = heatmap(corr, row_names=cols, column_names=cols) plt.show() plt.savefig(f'correlation_matrix{postfix}.png')
import warnings warnings.simplefilter(action='ignore', category=FutureWarning) import pandas as pd import matplotlib.pyplot as plt from mlxtend.plotting import scatterplotmatrix import numpy as np from mlxtend.plotting import heatmap # Load dataset diabetes = pd.read_csv('diabetes.csv', header=0) diabetes.columns = ['PREG', 'GLU', 'BP', 'SKIN', 'INSU', 'BMI', 'DPF', 'AGE', 'OUT'] features = ['PREG', 'GLU', 'BP', 'SKIN', 'INSU', 'BMI', 'DPF', 'AGE'] X = diabetes[features].values y = diabetes['OUT'].T # EDA cm = np.corrcoef(diabetes[diabetes.columns].values.T) hm = heatmap(cm, row_names=diabetes.columns, column_names=diabetes.columns) scatterplotmatrix(diabetes[diabetes.columns].values, figsize=(10, 8), names=diabetes.columns, alpha=0.4) plt.show()
# In[9]: from mlxtend.plotting import heatmap # In[10]: import numpy as np # In[11]: cm = np.corrcoef(df[cols].values.T) # In[12]: hm = heatmap(cm, row_names=cols, column_names=cols) plt.show() # In[13]: class LinearRegressionGD(object): def __init__(self, eta=0.001, n_iter=20): self.eta = eta self.n_iter = n_iter def fit(self, X, y): self.w_ = np.zeros(1 + X.shape[1]) self.cost_ = [] for i in range(self.n_iter):
def test_defaults(): heatmap(np.random.random((10, 5)))
y_val=y_test, epochs=args_dict['epochs']) print("Fitting time: {:.3f} seconds".format(time() - start_)) ans = model.predict(x_test) y_pred = np.argmax(ans, axis=1) print(y_pred) print(collections.Counter(np.equal(y_pred, y_test1))) #prediction counter print(collections.Counter(np.round(y_pred))) x_ = (np.equal(np.round(y_test1), np.round(y_pred))) print(collections.Counter(x_)) #confusion matrix heat map x = (confusion_matrix(np.round(y_test1), np.round(y_pred))) hm = heatmap(x, column_names=np.sort(np.unique(np.round(y_pred))), row_names=np.sort(np.unique(np.round(y_pred))), figsize=(30, 30)) plt.show() #Jaccard Score #j_score = jaccard_score(np.round(y_test1),np.round(y_pred), average='micro') #print('Jaccard Score: ',j_score) #Accuracy Score accuracy = accuracy_score(np.round(y_test1), np.round(y_pred)) print('Accuracy Score: ', accuracy) #classification report print('classification_report') print(classification_report(np.round(y_test1), np.round(y_pred))) #Distinct classes
def heat_map(self, data, cols): corr_map = np.corrcoef(data[cols].values.T) heat_map = heatmap(corr_map, row_names = cols, column_names = cols) plt.show()