def _build_data_set(self, to_file=False): list_of_dfs = [] df_patient_data = pd.read_excel(os.path.join(patient_data_path, 'PREDICT-AF_Measurements.xlsx'), # AduHeart_PatientData_Relevant index_col='patient_ID', header=0) for curv_file in self.files: print('case: {}'.format(curv_file)) ven = Trace(self.source_path, case_name=curv_file, view=self.view, interpolate=self.interpolate_traces) list_of_dfs.append(ven.get_biomarkers()) self.df_all_cases = pd.concat(list_of_dfs) self.df_all_cases.index.name = 'ID' self.df_all_cases['patient_ID'] = self.df_all_cases.index.map({k: k.split('_')[0] for k in self.df_all_cases.index}) print(self.df_all_cases) print(df_patient_data) self.df_all_cases = self.df_all_cases.join(df_patient_data.SB, how='inner', on='patient_ID') self.df_all_cases = self.df_all_cases.set_index(['patient_ID', self.df_all_cases.index]) self.df_all_cases['min_index'] = np.abs(self.df_all_cases.min_delta * self.df_all_cases['min']) self.df_all_cases['min_index_ED'] = np.abs(self.df_all_cases.min_delta_ED * self.df_all_cases.min_ED) self.df_all_cases['curv_len_inter'] = np.abs(self.df_all_cases.min_ED * self.df_all_cases.trace_length_ED) print(self.df_all_cases) if to_file: data_set_output_dir = check_directory(os.path.join(self.output_path, 'output_EDA')) self.df_all_cases.to_csv(os.path.join(data_set_output_dir, self.indices_file)) print('master table saved')
def save_curvatures(self): _output_path = check_directory(os.path.join(self.output_path, 'curvatures')) for case in self.files: ven = Trace(self.source_path, case_name=case, view=self.view, interpolate=self.interpolate_traces) print(ven.id) pd.DataFrame(ven.ventricle_curvature).to_csv(os.path.join(_output_path, ven.id+'.csv'))
def _set_paths_and_files(self, view=None, output_path=''): # self.view = view if view is not None: # Read specified view data self.files = glob.glob(os.path.join(self.source_path, self.view, '*.CSV')) else: # Read all CSVs in the source directory self.files = glob.glob(os.path.join(self.source_path, '*.CSV')) self.files.sort() if not output_path == '': self.output_path = check_directory(output_path)
def plot_curvatures(self, coloring_scheme='curvature', plot_mean=False): _source_path = os.path.join(self.source_path, self.view) if plot_mean: _output_path = check_directory(os.path.join(self.output_path, 'output_curvature', 'mean')) else: _output_path = check_directory(os.path.join(self.output_path, 'output_curvature')) for case in self.files: ven = Trace(self.source_path, case_name=case, view=self.view, interpolate=self.interpolate_traces) print(ven.id) print('Points: {}'.format(ven.number_of_points)) plot_tool = PlottingCurvature(source=_source_path, output_path=_output_path, ventricle=ven) if plot_mean: plot_tool.plot_mean_curvature() else: plot_tool.plot_all_frames(coloring_scheme=coloring_scheme) plot_tool.plot_heatmap()
def _plot_master(self): if self.df_master is None: self._try_get_data(master_table=True) _master_output_path = check_directory(os.path.join(self.output_path, 'output_master')) master_plot_tool = PlottingDistributions(self.df_master, '', _master_output_path) for col in self.biomarkers: print(col) if self.table_name != 'master_table.csv': master_plot_tool.plot_with_labels('4C_' + col, '3C_' + col) else: master_plot_tool.plot_2_distributions('4C_' + col, '3C_' + col, kind='kde')
def save_extemes(self, n=30): self._try_get_data(data=True) list_of_extremes = [] for col in self.df_all_cases.columns: list_of_extremes.append(self.df_all_cases[col].sort_values(ascending=False).index.values[:n]) list_of_extremes.append(self.df_all_cases[col].sort_values(ascending=False).values[:n]) index_lists = [2 * [i] for i in self.df_all_cases.columns] index = [item for sublist in index_lists for item in sublist] df_extremes = pd.DataFrame(list_of_extremes, index=index) _output_path = check_directory(os.path.join(self.output_path, self.view, 'output_EDA')) df_extremes.to_csv(os.path.join(_output_path, 'extremes.csv'))
def __init__(self, source_path='data', view='4C', output_path='data', indices_file='indices_all_cases.csv', interpolate_traces=None): self.view = view self.source_path = source_path self.output_path = check_directory(output_path) self.indices_file = indices_file self.interpolate_traces = interpolate_traces self.files = glob.glob(os.path.join(self.source_path, '*.CSV')) self.files.sort() self.df_all_cases = None self.df_master = None self.curv = None self.biomarkers = None self.table_name = None
def _plot_data(self): if self.df_all_cases is None: self._try_get_data(data=True) _view_output_path = check_directory(os.path.join(self.output_path, self.view, 'output_EDA')) plot_tool = PlottingDistributions(self.df_all_cases, '', _view_output_path) for col in self.biomarkers: plot_tool.set_series(col) plot_tool.plot_distribution() col_combs = combinations(self.biomarkers, 2) for comb in col_combs: if self.table_name != 'master_table.csv': plot_tool.plot_with_labels(comb[0], comb[1]) else: plot_tool.plot_2_distributions(comb[0], comb[1], kind='kde')
def plots_wt_and_curvature_vs_markers(self, save_figures=False): plot_dir = check_directory(os.path.join(self.output_path, 'plots')) x_labels = ['min_ED', 'avg_min_basal_curv', r'PLAX basal/mid', r'4C basal/mid', 'avg_basal_ED'] for x_label in x_labels: for y_label in self.FACTORS_BASIC: if x_label in ['PLAX basal_mid', '4C basal_mid']: plt.axvline(1.4, linestyle='--', c='k') self.df_comparison.plot(x=x_label, y=y_label, c='SB', kind='scatter', legend=True, colorbar=True, cmap='winter', title='Relation of {} to {}'.format(y_label, x_label)) else: self.df_comparison.plot(x=x_label, y=y_label, c=x_label, kind='scatter', legend=True, colorbar=True, cmap='autumn', title='Relation of {} to {}'.format(y_label, x_label)) if save_figures: plt.savefig(os.path.join(plot_dir, r'{} vs {} HTNs.png'.format(y_label, x_label.replace('/', '_')))) else: plt.show() plt.close()
def linear_regression_basic_factors(self, to_file=False, show_plots=False): from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error, r2_score markers = ['Average septal curvature [cm-1]', r'Wall thickness ratio in 4CH view', r'Wall thickness ratio in PLAX view'] list_results = [] for marker in markers: for factor in self.FACTORS_BASIC: x = self.df_comparison[marker].values.reshape(-1, 1) y = self.df_comparison[factor].values.reshape(-1, 1) lr = LinearRegression() lr.fit(x, y) y_pred = lr.predict(x) rho_sp, p_sp = spearmanr(x, y) r_pe = pearsonr(x, y) dict_results = {'marker': marker, 'factor': factor, 'coefficients': lr.coef_, 'R2': r2_score(y, y_pred), 'mse': mean_squared_error(y, y_pred), 'spearmanr': rho_sp, 'spearmanp': p_sp, 'pearsonr': r_pe} list_results.append(dict_results) if show_plots: plots = PlottingDistributions(self.df_comparison, 'min', check_directory(os.path.join(self.output_path, 'plots'))) plots.plot_with_labels(series1=marker, series2=factor, w_labels=False) df_results = pd.DataFrame(list_results) if to_file: df_results.to_csv(os.path.join(self.output_path, 'Linear_regression_results.csv'))
def plot_curv_vs_wt(self, save_figures=False, w_reg=False): plot_dir = check_directory(os.path.join(self.output_path, 'plots')) x_labels = [r'PLAX basal/mid', r'4C basal/mid', 'IVSd (basal) PLAX', 'IVSd (mid) PLAX', 'IVSd (basal) 4C', 'IVSd (mid) 4C'] y_labels = ['min_ED', 'avg_basal_ED', 'avg_min_basal_curv'] # for x_label in x_labels: # for y_label in y_labels: # self.df_comparison.plot(x=x_label, y=y_label, c='SB', kind='scatter', legend=True, colorbar=True, # cmap='winter', title='Relation of {} to {}'.format(y_label, x_label)) # means_x = self.df_comparison.groupby('SB')[x_label].mean() # means_y = self.df_comparison.groupby('SB')[y_label].mean() # plt.plot(means_x, means_y, 'kd') # # if x_label in [r'PLAX basal/mid', r'4C basal/mid']: # plt.axvline(1.4, linestyle='--', c='k') # if save_figures: # plt.savefig(os.path.join(plot_dir, r'Meas {} vs {} HTNs.png'.format(y_label, # x_label.replace('/', '_')))) # else: # plt.show() # plt.close() # print('Curvature below -1: {}'.format(self.df_comparison.curv_threshold.sum())) # print('4C above 1.4: {}'.format((self.df_comparison['4C basal/mid'] > 1.4).sum())) # print('PLAX above 1.4: {}'.format((self.df_comparison['PLAX basal/mid'] > 1.4).sum())) # print('SB cases: {}'.format((self.df_comparison.SB > 1).sum())) # 'Average septal curvature [cm-1]', # # r'Wall thickness ratio in 4CH view', # # r'Wall thickness ratio in PLAX view' from matplotlib import cm from matplotlib.colors import ListedColormap top = cm.get_cmap('Oranges_r', 128) bottom = cm.get_cmap('Blues', 128) newcolors = np.vstack((top(np.linspace(0, 1, 512)), bottom(np.linspace(0, 1, 512)))) newcmp = ListedColormap(newcolors, name='OrangeBlue') self.df_comparison.plot(x=r'Wall thickness ratio in PLAX view', y=r'Wall thickness ratio in 4CH view', c='Average septal curvature [cm-1]', kind='scatter', legend=False, s=200, colorbar=True, cmap=newcmp, title='', figsize=(9, 7.2)) # plt.title('Curvature values w.r.t. both WTR metrics', fontsize=26) plt.xlabel(r'Wall thickness ratio in PLAX view', fontsize=23) plt.ylabel(r'Wall thickness ratio in 4CH view', fontsize=23) plt.xticks(fontsize=16) plt.yticks(fontsize=16) plt.axvline(1.4, ymax=0.47, linestyle='--', c='k') plt.axhline(1.4, xmax=0.462, linestyle='--', c='k') plt.xlim((0.7, 2.2)) plt.ylim((0.7, 2.2)) plt.tight_layout() f = plt.gcf() f.get_axes()[1].set_ylabel('Average septal curvature $[dm^{-1}]$', fontsize=23) f.get_axes()[1].tick_params(labelsize=16) if save_figures: plt.savefig(os.path.join(plot_dir, r'Ratios_curvature.svg')) else: plt.show()
view = '' segment = '' df_range = var.calculate_sem_single_index(o2=o2) var.bland_altman_plot_single_index(o2=o2) # ranges = pd.concat((ranges, df_range), axis=0) # for view in ['PLAX', '4C']: # var.bland_altman_plot_multi_index(o2=o2, view=view, segment='ratio') # df_range = var.calculate_sem_multi_index(o2=o2, view=view, segment='ratio') # ranges = pd.concat((ranges, df_range), axis=0) # ranges.to_csv(os.path.join(var.output_path, 'ranges.csv')) # print(ranges) # STRAIN ANALYSIS # patient_data_path = os.path.join('C:\Data\ProjectCurvature\Analysis\Output_HTN\Statistics') curvature_results = os.path.join('C:/', 'Data', 'ProjectCurvature', 'Analysis', 'Output') output = check_directory(os.path.join('C:\Data\ProjectCurvature\Analysis\Output_HTN\Statistics\plots', 'EDA')) # measurements = 'AduHeart_Measurements.xlsx' # twodstrain = 'AduHeart_Strain_MW.xlsx' # curvature = 'master_table_full.csv' # patient_info = 'AduHeart_PatientData_Full.xlsx' merged_data = 'Measurements_and_2DstrainPlotting.csv' # # anal = StrainAnalysis(patient_data_path, curvature_results, output, merged_data_filename=merged_data) # anal.plots_wt_and_curvature_vs_markers(True) # anal.plot_curv_vs_wt(True) # anal.get_statistics() # anal.linear_regression_basic_factors(False, show_plots=True) # STATANALYSIS