def get_palette_colour(label): palette = CONFIG.base_palette(n=7) mapping = { 'Shape': palette[0], 'Clinical': palette[1], 'CT First Order': palette[2], 'First Order': palette[2], 'CT GLCM': palette[4], 'CT GLDM': palette[4], 'CT GLRLM': palette[4], 'CT GLSZM': palette[4], 'CT NGTDM': palette[4], 'PET GLCM': palette[5], 'PET GLDM': palette[5], 'PET GLRLM': palette[5], 'PET GLZSM': palette[5], 'PET NGTDM': palette[5], 'PET First Order': palette[3], 'CT Texture': palette[4], 'Texture': palette[4], 'PET Texture': palette[5], 'PET Parameter': palette[6] } return mapping[label]
def _plot_tve(df_avg_co_scores, co_grid): color = CONFIG.base_palette(n=1) y_coords = df_avg_co_scores.loc[:, 'tvr'] x_coords = y_coords.index plt.plot( x_coords, y_coords.values, color='blue', marker='o', linestyle='' ) y_coords = np.linspace( np.min(df_avg_co_scores.loc[:, 'tvr']), np.max(df_avg_co_scores.loc[:, 'tvr']), 6 ) y_ticks = [f'{tick:.02f}' for tick in df_avg_co_scores.loc[:, 'tvr']] plt.yticks(y_coords, y_ticks) plt.xticks(np.arange(len(co_grid)), np.arange(1, 9, 1, dtype=int)) plt.ylabel('Transposed Virtual Error') plt.xlabel('Number of Biclusters') plt.savefig( '../biclustering/tve_by_param_config.pdf', bbox_inches='tight', dpi=CONFIG.DPI, )
def _plot_row_bics(orig_co_model, y_orig): df_orig_row_clusters = _row_bics(orig_co_model, y_orig) colors = CONFIG.base_palette(n=2) data = {} for num, group in enumerate(['Disease-Free Survival', 'Other Event']): y = df_orig_row_clusters.loc[df_orig_row_clusters.loc[:, 'results_id'] == group, :] y = np.squeeze(y.loc[:, 'comb_results'].values) data[group] = y df = pd.DataFrame(data, index=np.arange(1, 4, 1)) df.plot( kind='barh', figsize=(15, 9.27), colormap='viridis' ) plt.ylabel('Row Cluster Indicator') plt.xlabel('Treatment outcome (%)') plt.savefig( '../biclustering/row_bics.pdf', bbox_inches='tight', dpi=CONFIG.DPI, )
def gen_piechart(): path_to_figure = 'absolute_piechart.pdf' show = True labels = [ 'Shape', 'CT First Order', 'CT Texture', 'PET First Order', 'PET Texture', 'Clinical' ] # Absolute sizes. sizes = [2, 6, 11, 1, 2, 4] # Sizes relative to feature category size. #sizes = [2 / 8, 6 / 12, 11 / 56, 1 / 7, 2 / 27, 4 / 42] # PET FS = 7 # PET TEXT = 27 # CT FS = 12 # CT TEXT = 56 # Shape = 8 # Clinical = 42 colors = [] handles = [] for key in labels: color = get_palette_colour(key) colors.append(color) handles.append(mpatches.Patch(color=color, label=key)) palette = CONFIG.base_palette(n=len(sizes)) plt.pie(sizes, colors=colors, autopct='%1.1f%%', shadow=False, startangle=0, labeldistance=0.85, textprops={'fontsize': 22}, pctdistance=0.8) plt.legend( handles=handles, title='Feature Categories:', title_fontsize=18, fancybox=True, shadow=True, ncol=1, labelspacing=0.25, ) plt.axis('equal') plt.savefig(path_to_figure, bbox_inches='tight', transparent=True, dpi=CONFIG.DPI) if show: plt.show()
def plot_radiomics_feat_scatter(): show = False path_to_figure = '../expl_analysis/' path_to_target = './../../data_source/to_analysis/original_images/dfs_original_images.csv' path_to_predictors = './../../data_source/to_analysis/original_images/all_features_original_images.csv' y = np.squeeze(pd.read_csv(path_to_target, index_col=0).values) X = pd.read_csv(path_to_predictors, index_col=0) X_shape = X.filter(regex='shape') X_PET = X.filter(regex='PET') X_CT = X.filter(regex='CT') scaler = StandardScaler() X_shape_std = scaler.fit_transform(X_shape) X_PET_std = scaler.fit_transform(X_PET) X_CT_std = scaler.fit_transform(X_CT) ylabels = ['Shape Feature', 'PET Feature', 'CT Feature'] for num, dset in enumerate([X_shape_std, X_PET_std, X_CT_std]): palette = CONFIG.base_palette(n=dset.shape[1]) x_coords = np.arange(1, np.size(y) + 1, dtype=int) plt.figure() for col_num, shape_col in enumerate(dset.T): plt.scatter( x_coords, shape_col, color=palette[col_num] ) plt.xlabel('Patient ID') plt.ylabel(f'Z-scored {ylabels[num]} Values') x_coords = np.linspace(1, np.size(y), 6, dtype=int) y_coords = np.linspace(np.min(dset), np.max(dset), 6) plt.xticks(x_coords, x_coords) y_ticks = [] for tick in y_coords: comps = str(tick).split('.')[0] if '-' in comps[0] and len(comps[0]) == 2: y_ticks.append(f'{tick:.02f}') elif len(comps[0]) > 1: y_ticks.append(f'{tick:.01f}') else: y_ticks.append(f'{tick:.02f}') plt.yticks(y_coords, y_ticks) _path_to_figure = f'{path_to_figure}rfeat_vals_{ylabels[num].split( )[0]}.pdf' plt.savefig(_path_to_figure, bbox_inches='tight', dpi=CONFIG.DPI) if show: plt.show()
def get_palette_colour(label): palette = CONFIG.base_palette(n=7) mapping = { 'Shape': palette[0], 'Clinical': palette[1], 'CT First Order': palette[2], 'PET First Order': palette[3], 'CT Texture': palette[4], 'PET Texture': palette[5], 'PET parameter': palette[6] } return mapping[label]
def get_palette_colour(label): palette = CONFIG.base_palette(n=9) mapping = { 'Shape': palette[0], 'Clinical': palette[1], 'First Order': palette[2], 'GLCM': palette[3], 'GLRLM': palette[4], 'GLSZM': palette[5], 'GLDM': palette[6], 'NGTDM': palette[7], 'PET parameter': palette[8] } return mapping[label]
def _plot_gtv_reduction(red_volume, show=True): # The fraction of GTV removed by discarding damaged slices. path_to_figure = '../damaged_slices/frac_gtv_removed.pdf' palette = CONFIG.base_palette(n=4) grid = np.arange(len(red_volume)) plt.figure() sns.scatterplot(grid, red_volume) markerline, stemlines, baseline = plt.stem(grid, red_volume, linefmt='-', markerfmt='o', bottom=0.0) plt.setp(stemlines, color=palette[3], linewidth=2) plt.setp(baseline, color=palette[3], linewidth=0.5) plt.setp(markerline, 'markerfacecolor', palette[-3]) #plt.axhline(y=50, c=palette[-3]) plt.xlabel('Patient ID') plt.ylabel('Reduction in Tumor Volume (%)') x_coords = np.linspace(1, len(red_volume), 5) x_ticks = [f'{int(tick)}' for tick in x_coords] plt.xticks(x_coords, x_ticks) y_coords = np.linspace(0.00, 50.0, 6) y_ticks = [] for tick in y_coords: if len(str(tick).split('.')[0]) > 1: y_ticks.append(f'{tick:.01f}') else: y_ticks.append(f'{tick:.02f}') plt.yticks(y_coords, y_ticks) plt.savefig( path_to_figure, bbox_inches='tight', transparent=True, dpi=CONFIG.DPI, ) if show: plt.show()
def gen_histogram(): show = False path_to_figure = './histogram.pdf' palette = CONFIG.base_palette(n=1) data = np.random.normal(size=10000) plt.hist(data, bins=200, color=palette) plt.axis('off') plt.savefig(path_to_figure, bbox_inches='tight', transparent=True, dpi=CONFIG.DPI) if show: plt.show()
def gen_doughnut(): show = False path_to_figure = './doughnut.pdf' n = 5 sizes = np.ones(n) / n palette = CONFIG.base_palette(n=len(sizes)) circle = plt.Circle((0, 0), 0.7, color='white') plt.pie(sizes, colors=palette) axis = plt.gcf() axis.gca().add_artist(circle) plt.savefig(path_to_figure, bbox_inches='tight', transparent=True, dpi=CONFIG.DPI) if show: plt.show()
def _plot_column_bics(orig_co_model, X_orig): df_orig_column_clusters = _column_bics(orig_co_model, X_orig) fig = df_orig_column_clusters.plot( kind='bar', width=0.9, figsize=(15, 9.27), colormap=ListedColormap( CONFIG.base_palette(n=len(df_orig_column_clusters.columns)) ) ) plt.xlabel('Column Cluster Indicator') plt.ylabel('Feature Category (%)') n = 3 bar_width = 0.1 # Adjust one strawling shitty bar. for bar_num, bar in enumerate(fig.patches): if bar_num == 26: bar.set_x(bar.get_x() - 0.1) """ else: bar.set_x(bar.get_x() - 0.09) # NB: plt.setp(bar, width=bar_width) """ plt.legend( loc='center right', bbox_to_anchor=(0.25, 0.75), ncol=1, fancybox=True, shadow=True ) plt.xticks(np.arange(n), np.arange(1, n + 1), rotation=0) plt.yticks(np.linspace(0.0, 0.8, 6), _ticks(np.linspace(0.0, 80, 6))) plt.savefig( '../biclustering/column_bics.pdf', bbox_inches='tight', dpi=CONFIG.DPI, )
def plot_img_value_stats(path_to_fig, path_to_images, path_to_masks=None, show=False, include_legend=False): """Calculate descriptive statistics of image values.""" labels = ['Maximum', 'Mean', 'Median', 'Minimum'] images = load_images(path_to_images, path_to_masks) patient_id = CONFIG.patient_axis_ticks() df_stats = calc_image_vale_stats(images, patient_id) palette = CONFIG.base_palette(n=4) # NOTE: linspace should be max value! for num, row_label in enumerate(df_stats.T): # Plot image stats. stat = np.squeeze(df_stats.loc[row_label, :].values) plt.scatter(x=np.squeeze(df_stats.columns.values), y=stat, label=labels[num], color=palette[num], alpha=CONFIG.ALPHA) if include_legend: plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=4, fancybox=True, shadow=True) y_coords = np.linspace(0, max(df_stats.max()), 6, dtype=int) y_ticks = _ticks(y_coords) plt.yticks(y_coords, y_ticks) x_coords = np.linspace(1, 198, 6, dtype=int) plt.xticks(x_coords, x_coords) plt.savefig(path_to_fig, bbox_inches='tight', dpi=400) if show: plt.show()
def _plot_train_valid(results, path_to_figure=None, show=False): test = results['test_score'] train = results['train_score'] test_std = np.sqrt(results['test_score_variance']) train_std = np.sqrt(results['train_score_variance']) palette = CONFIG.base_palette(n=4) # Repeated experiments. x_coords = np.arange(np.size(test)) + 1 plt.plot( x_coords, train, color=palette[1], marker='o', markersize=5, label=f"Training score" ) plt.plot( x_coords, test, color=palette[2], linestyle='--', marker='s', label='Validation score' ) plt.fill_between( x_coords, train - train_std, train + train_std, alpha=0.25, color=palette[1] ) plt.fill_between( x_coords, test - test_std, test + test_std, alpha=0.25, color=palette[2] ) plt.legend( loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=2, fancybox=True, shadow=True ) y_coords = np.linspace(0.0, 1.0, 6) y_ticks = _ticks(y_coords) plt.yticks(y_coords, y_ticks) plt.ylim([0.4, 1.01]) x_coords = np.linspace(np.min(x_coords), np.max(x_coords), 6) x_ticks = _ticks(x_coords) plt.xticks(x_coords, x_ticks) plt.xlim([x_coords[0] - 0.1, x_coords[-1] + 0.1]) if path_to_figure is not None: plt.savefig(path_to_figure, bbox_inches='tight', dpi=CONFIG.DPI) if show: plt.show()
def learning_curve(): path_to_figure = './../../figures/best_model/learning_ms_xgb.pdf' train_sizes = np.linspace(0.4, 1.0, 37) learn_avg_test = np.load( './../../data_source/results/performance_curves/learning_curve/learning_curve_avg_test.npy' ) learn_avg_train = np.load( './../../data_source/results/performance_curves/learning_curve/learning_curve_avg_train.npy' ) learn_std_test = np.load( './../../data_source/results/performance_curves/learning_curve/learning_curve_std_test.npy' ) learn_std_train = np.load( './../../data_source/results/performance_curves/learning_curve/learning_curve_std_train.npy' ) learn_avg_test = np.squeeze(np.mean(learn_avg_test, axis=0)) learn_avg_train = np.squeeze(np.mean(learn_avg_train, axis=0)) learn_std_test = np.squeeze(np.mean(learn_std_test, axis=0)) learn_std_train = np.squeeze(np.mean(learn_std_train, axis=0)) palette = CONFIG.base_palette(n=4) plt.figure() plt.plot( train_sizes, learn_avg_train, color=palette[1], marker='o', markersize=5, label=f"Training score" ) plt.plot( train_sizes, learn_avg_test, color=palette[2], linestyle='--', marker='s', label=f"Validation score" ) plt.fill_between( train_sizes, learn_avg_train - learn_std_train, learn_avg_train + learn_std_train, alpha=0.15, color=palette[1] ) plt.fill_between( train_sizes, learn_avg_test - learn_std_test, learn_avg_test + learn_std_test, alpha=0.15, color=palette[2] ) plt.xlabel('Fraction of Training Set') plt.ylabel('Average Weighted ROC AUC') x_coords = np.linspace(0.6, 1.0, 6) x_ticks = _ticks(x_coords) plt.xticks(x_coords, x_ticks) y_coords = np.linspace( np.min([learn_avg_train - learn_std_train, learn_avg_test - learn_std_test]), np.max([learn_avg_train + learn_std_train, learn_avg_test + learn_std_test]), 6 ) y_ticks = _ticks(y_coords) plt.yticks(y_coords, y_ticks) plt.xlim([0.62, 1.01]) plt.legend( loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=2, fancybox=True, shadow=True ) plt.savefig(path_to_figure, bbox_inches='tight', dpi=CONFIG.DPI)
def plot_hassan_mod(): # Applies to CT texture features originally. Extending to PET. #path_to_features = '../../data_source/to_analysis/removed_broken_slices/all_features_removed_broken_slices.csv' path_to_features = '../../data_source/to_analysis/original_images/all_features_original_images.csv' gen_graylevels(mode='ase') modal = 'ct' show = False if modal == 'pet': #path_to_figure = './../feature_redundancy/removed_broken_pet_hassan_modifications.pdf' path_to_figure = './../feature_redundancy/pet_hassan_modifications.pdf' else: #path_to_figure = './../feature_redundancy/removed_broken_ct_hassan_modifications.pdf' path_to_figure = './../feature_redundancy/ct_hassan_modifications.pdf' X = pd.read_csv(path_to_features, index_col=0) ct_text_bins32, ct_text_bins64, ct_text_bins128 = get_graylevels(mod='ct', kind='texture') pet_text_bins32, pet_text_bins64, pet_text_bins128 = get_graylevels(mod='pet', kind='texture') ct_fs_bins32, ct_fs_bins64, ct_fs_bins128 = get_graylevels(mod='ct', kind='firstorder') pet_fst_bins32, pet_fs_bins64, pet_fs_bins128 = get_graylevels(mod='pet', kind='firstorder') CT = X.filter(regex='CT') PET = X.filter(regex='PET') CT_fs = CT.filter(regex='firstorder') PET_fs = PET.filter(regex='firstorder') CT_text = CT.drop(CT_fs.columns, axis=1, inplace=False) PET.drop(PET_fs.columns, axis=1, inplace=True) PET_params = ['PETparam_SUVpeak', 'PETparam_MTV', 'PETparam_TLG'] PET_text = PET.drop(PET_params, axis=1, inplace=False) #print(PET_text.head()) #print(CT_text.head()) if modal == 'ct': gl_bins = np.array([ct_text_bins32, ct_text_bins64, ct_text_bins128]) df_icc = icc_from_hassan_modified(CT_text, gl_bins) print(df_icc) else: gl_bins = np.array([ct_text_bins32, ct_text_bins64, ct_text_bins128]) df_icc = icc_from_hassan_modified(PET_text, gl_bins) print(df_icc) plt.figure() fig = sns.barplot( x=df_icc.index, y='Score', hue='Kind', data=df_icc, palette=CONF.base_palette(n=6) ) for patch_num, patch in enumerate(fig.patches): current_width = patch.get_width() diff = current_width - 0.3 patch.set_width(0.3) patch.set_x(patch.get_x() + diff * 0.5) plt.legend( loc='upper center', bbox_to_anchor=(0.5, 1.1), ncol=2, fancybox=True, shadow=True ) labels = np.unique(format_feature_labels(hassan_gl_transforms.keys())) x_coords = np.arange(len(labels)) fig.set_xticks(x_coords) fig.set_xticklabels(labels, rotation=30, ha='right') y_coords = np.linspace(0.0, 1.0, 6) y_ticks = [f'{tick:.02f}' for tick in y_coords] fig.set_yticks(y_coords) fig.set_yticklabels(y_ticks) plt.ylabel('Intraclass Correlation Coefficient') plt.savefig(path_to_figure, bbox_inches='tight', dpi=CONF.DPI) if show: plt.show()
def pairplot_biomarkers(): """ NB: Everything is Z-scored since the same transformation is done prior to classification! """ show = False path_to_figure = '../../figures/feature_importances/pairplots/' _X, y = hpv_unrel_X_y(return_clinical=False) biomarkers = get_biomarkers(labels_only=True) labels = format_feature_labels(biomarkers) X = _X.loc[:, biomarkers] scaler = StandardScaler() X = scaler.fit_transform(X) X = pd.DataFrame(X, index=list(_X.index), columns=labels) hue1 = np.squeeze(np.where(y == 0)) hue2 = np.squeeze(np.where(y == 1)) palette = CONFIG.base_palette(n=6) bins = np.linspace(0, 10, 100) nrows, ncols = 4, 4 for col_num in range(ncols): x_label = labels[col_num] for row_num in range(nrows): y_label = labels[row_num] plt.figure() X_col_hue1 = X.iloc[hue1, col_num] X_col_hue2 = X.iloc[hue2, col_num] X_row_hue1 = X.iloc[hue1, row_num] X_row_hue2 = X.iloc[hue2, row_num] # Plot distribution. if x_label == y_label: ext = 'dist' plt.hist(X_col_hue1, color=palette[1]) plt.hist(X_col_hue2, color=palette[2]) #plt.xlabel(f'{y_label} Bins') #plt.ylabel('Count') print(y_label) min_x = min(np.min(X_col_hue1), np.min(X_col_hue2)) max_x = max(np.max(X_col_hue1), np.max(X_col_hue2)) #x_coords = np.linspace(min_x, max_x, 6, dtype=float) #x_ticks = [f'{tick:.02f}' for tick in x_coords] #plt.xticks(x_coords, x_ticks) plt.xticks([], []); plt.yticks([], []) # Scatter plot. else: ext = 'scatter' plt.scatter(X_col_hue1, X_row_hue1, color=palette[1]) plt.scatter(X_col_hue2, X_row_hue2, color=palette[2]) plt.ylabel(y_label) plt.xlabel(x_label) min_x = min(np.min(X_col_hue1), np.min(X_col_hue2)) max_x = max(np.max(X_col_hue1), np.max(X_col_hue2)) x_coords = np.linspace(min_x, max_x, 6, dtype=float) x_ticks = [f'{tick:.02f}' for tick in x_coords] plt.xticks(x_coords, x_ticks) min_y = min(np.min(X_row_hue1), np.min(X_row_hue2)) max_y = max(np.max(X_row_hue1), np.max(X_row_hue2)) y_coords = np.linspace(min_y, max_y, 6, dtype=float) y_ticks = [f'{tick:.02f}' for tick in y_coords] plt.yticks(y_coords, y_ticks) _path_to_figure = f'{path_to_figure}{ext}_{col_num}_{row_num}.pdf' plt.savefig(_path_to_figure, dpi=CONFIG.DPI, bbox_inches='tight') if show: plt.show()