def load_data(self): self.scatter3DWindow.move(int(0.60 * self.screen_size.width()), int(0.05 * self.screen_size.height())) self.scatter3DWindow.resize(int(0.20 * self.screen_size.width()), int(0.20*self.screen_size.width())) self.tern2DWindow.move(int(0.60 * self.screen_size.width()), int(0.45 * self.screen_size.height())) self.tern2DWindow.resize(int(0.20 * self.screen_size.width()), int(0.20*self.screen_size.width())) view.start_progress_bar(self.progressBar, start=0, stop=4) self.data.screen_size = self.screen_size self.data.OS = self.OS # reinitialize auto_cluster data self.data.tab_cluster_data = pd.Series self.data.auto_cluster_idx = [] # load fcs file sample_size = self.clusterSampleSize.text() sample_size = int(sample_size) successful_load = self.data.fcs_read(sample_size) if successful_load: # fill parameter table self.data.param_combo_box_list = view.init_param_table(self.parameterTable, self.data.params) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() # transform data self.data.transform_data() # print successful load and display number of cells self.fileLabel.setText(self.data.sample_name + '\n' + self.data.data_size.__str__() + ' total cells (' + self.data.raw.shape[0].__str__() + ' sampled cells)') view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() # initialize 2D and 3D zbow graph self.data.zbow_3d_plot(self.scatter3DWindow, scale=self.scatterScaleOption.currentIndex(), color=self.scatterColorOption.currentIndex()) self.data.zbow_2d_plot(self.tern2DWindow, scale=self.ternScaleOption.currentIndex(), color=self.ternColorOption.currentIndex()) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() self.cluster_data() view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() self.data.outliers_removed = False else: helper.error_message(self.error_dialog, 'Could not load .fcs or .csv file')
def make_cluster_plots(self): if isinstance(self.data.path_name, str): default_path = self.data.path_name else: default_path = '~/' file_list = QtWidgets.QFileDialog.getOpenFileNames(caption='Select cluster summary files', directory=default_path, filter='*Summary.csv') QtWidgets.QApplication.processEvents() if file_list[0]: view.start_progress_bar(self.progressBar, start=0, stop=len(file_list[0])-1) path_name = os.path.dirname(os.path.abspath(file_list[0][0])) cluster_figure, cluster_ax = plt.subplots() cluster_figure.set_dpi(300) sample_name = list() for i, file in enumerate(file_list[0]): sample_name.append(os.path.splitext(os.path.basename(file))[0]) tab_data = pd.read_csv(file) tab_data = tab_data[tab_data['id'] != 'noise'] bar_data = tab_data['percentage'] bar_color = tab_data[['mean R', 'mean G', 'mean B']] cluster_ax.boxplot(bar_data, positions=[i+1], sym='', vert=True, medianprops=dict(color='k')) x_coord = [i+1] * len(bar_data) bar_data_square = [j ** 2 for j in bar_data] x_fudge_factor = np.divide(x_coord, bar_data_square) x_fudge_factor[x_fudge_factor > 0.2] = 0.2 x_fudge_factor[x_fudge_factor < 0.02] = 0.02 x_fudge_choice = [uniform(-x_fudge_factor[k], x_fudge_factor[k]) for k, val in enumerate(x_fudge_factor)] x_coord = np.array(x_coord) + np.array(x_fudge_choice) bar_color['alpha'] = [0.7] * len(bar_color) bar_color = [tuple(x) for x in bar_color.values] cluster_ax.scatter(x_coord, bar_data, s=100, c=bar_color) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() plt.ylim(0, 100) plt.xlim(0.5, len(file_list[0])+0.5) sample_name = [x.replace('_Summary', '') for x in sample_name] sample_name = [x[0:20] for x in sample_name] plt.xticks(range(1, len(file_list[0])+1), sample_name) cluster_ax.tick_params(axis='x', labelsize='x-small') cluster_ax.yaxis.set_major_locator(ticker.MultipleLocator(10)) plt.savefig(os.path.join(path_name, 'combined_cluster_graph.png'), dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.savefig(os.path.join(path_name, 'combined_cluster_graph.eps'), dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.close(cluster_figure) else: helper.error_message(self.error_dialog, 'No files selected')
def save_data(self): from datetime import datetime num_of_progress_bar_steps = 4 + len(self.data.tab_cluster_data['id']) view.start_progress_bar(self.progressBar, start=0, stop=num_of_progress_bar_steps) # get directory to save to self.data.save_folder = QtWidgets.QFileDialog.getExistingDirectory(caption='Select directory to save output', directory=os.path.dirname(self.data.file_name)) if self.data.save_folder: # make subdirectories if they don't exist if not os.path.isdir(os.path.join(self.data.save_folder, 'ternary_plots')): os.makedirs(os.path.join(self.data.save_folder, 'ternary_plots')) if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_backgates')): os.makedirs(os.path.join(self.data.save_folder, 'cluster_backgates')) if not os.path.isdir(os.path.join(self.data.save_folder, 'bar_graphs_and_cluster_plots')): os.makedirs(os.path.join(self.data.save_folder, 'bar_graphs_and_cluster_plots')) if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_solutions')): os.makedirs(os.path.join(self.data.save_folder, 'cluster_solutions')) if not os.path.isdir(os.path.join(self.data.save_folder, 'cluster_summaries')): os.makedirs(os.path.join(self.data.save_folder, 'cluster_summaries')) # self.data.tab_cluster_data.to_pickle(path=os.path.join(self.data.save_folder, # self.data.sample_name + '_Summary.pkl'), # compression=None) self.data.tab_cluster_data.to_csv(os.path.join(self.data.save_folder, 'cluster_summaries', self.data.sample_name + '_Summary.csv'), index=False, header=True) if self.data.outliers_removed: cluster_solution = self.data.raw_filtered cluster_solution.insert(0, 'clusterID', self.data.cluster_data_idx) else: cluster_solution = self.data.raw cluster_solution.insert(loc=0, column='clusterID', value=pd.Series(self.data.cluster_data_idx)) # cluster_solution.to_pickle(path=os.path.join(self.data.save_folder, # self.data.sample_name + '_cluster_solution.pkl'), # compression=None) cluster_solution.to_csv(path_or_buf=os.path.join(self.data.save_folder, 'cluster_solutions', self.data.sample_name + '_cluster_solution.csv'), index=False, header=True) metadata_output = {'fcs_file': self.data.file_name, 'date_and_time': datetime.now(), 'original_sample_size': self.data.data_size, 'sample_size': cluster_solution.shape[0], 'HDBSCAN_min_cluster_size': self.clusterMinClusterSize.text(), 'HDBSCAN_min_samples': self.clusterMinSamples.text(), 'noise_cluster_idx': self.data.noise_cluster_idx, 'red_only_idx': self.data.red_only_cluster_idx, 'gini_coefficient': self.data.gini, 'shannon_entropy': self.data.shannon } del cluster_solution metadata_output = pd.DataFrame.from_dict(metadata_output, orient='index') metadata_output.to_csv(path_or_buf=os.path.join(self.data.save_folder, 'cluster_solutions', self.data.sample_name + '_metadata.csv'), index=True, header=False) view.update_progress_bar(self.progressBar) QtWidgets.QApplication.processEvents() self.data.make_output_plots(scale=self.ternScaleOption.currentIndex(), color=self.ternColorOption.currentIndex(), progress_bar=self.progressBar) else: helper.error_message(self.error_dialog, 'Could not retrieve directory to save to')
def make_output_plots(self, scale, color, progress_bar): # get scale data: scale_list = ['custom', 'default', 'linear'] if scale == 0: scale_data = self.custom_transformed.as_matrix() elif scale == 1: scale_data = self.default_transformed.as_matrix() elif scale == 2: scale_data = self.linear_transformed.as_matrix() if scale == 0: contour_data = self.custom_ternary.as_matrix() elif scale == 1: contour_data = self.default_ternary.as_matrix() elif scale == 2: contour_data = self.linear_ternary.as_matrix() # get color data:color_list = ['custom', 'default', 'cluster color', 'linear'] if color == 0: color_data = self.custom_transformed.as_matrix() elif color == 1: color_data = self.default_transformed[['RFP', 'YFP', 'CFP']].as_matrix() elif color == 2: if self.tab_cluster_data.empty: color_data = helper.distinguishable_colors(1) else: pseudo_color = helper.distinguishable_colors(self.tab_cluster_data['id'].count()) pseudo_color[self.noise_cluster_idx] = "#646464" color_data = [None] * scale_data.shape[0] for i in range(0, scale_data.shape[0]): color_data[i] = pseudo_color[self.cluster_data_idx[i]] elif color == 3: color_data = self.linear_transformed[['RFP', 'YFP', 'CFP']].as_matrix() # this assures that R + G + B = scale, which is required for the ternary library total = scale_data.sum(axis=1) scale_data = scale_data/total[:, None] ###### TERNARY PLOT WITH CONTOUR ###### # get Gaussian kernel for contour plot xmin = 0 xmax = 1 ymin = 0 ymax = 1 X, Y = np.mgrid[xmin:xmax:200j, ymin:ymax:200j] positions = np.vstack([X.ravel(), Y.ravel()]) values = np.vstack([contour_data[:, 0], contour_data[:, 1]]) kernel = sd.gaussian_kde(values) Z = np.reshape(kernel(positions).T, X.shape) # new way with library scale = 1 tern_figure, tern_plot = ternary.figure(scale=scale) tern_figure.set_size_inches(5.37, 5) # this is proper scaling to approximate an equilateral triangle tern_figure.set_dpi(300) # tern_plot.set_title("ternary plot", fontsize=18) tern_plot.boundary(linewidth=1.0) tern_plot.gridlines(multiple=0.1, color='grey') tern_plot.scatter(scale_data, marker='o', color=color_data, s=2) tern_plot.clear_matplotlib_ticks() plt.contour(X, Y, Z, colors='k', alpha=0.6, linewidths=1) ternary_filename = os.path.join(self.save_folder, 'ternary_plots', self.sample_name) plt.savefig(ternary_filename + '.png', dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.savefig(ternary_filename + '.eps', dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.close(tern_figure) view.update_progress_bar(progress_bar) QtWidgets.QApplication.processEvents() ######### BAR GRAPH AND CLUSTER PLOT ######### bar_data = self.tab_cluster_data['percentage'] bar_color = self.tab_cluster_data[['mean R', 'mean G', 'mean B']] bar_figure, bar_ax = plt.subplots() bar_figure.set_size_inches(3, 6) bar_figure.set_dpi(300) for j in range(0, len(bar_data)): if j == 0: b_col = bar_color.iloc[j].values.tolist() b_col.append(1) bar_ax.bar(0, bar_data.iloc[j], color=b_col) total = bar_data.iloc[j] else: b_col = bar_color.iloc[j].values.tolist() if j == self.noise_cluster_idx: b_col.append(0) # set noise cluster to total transparency else: b_col.append(1) bar_ax.bar(0, bar_data.iloc[j], bottom=total, color=b_col) total = total + bar_data.iloc[j] plt.ylim(0, 100) plt.xticks([]) bar_filename = os.path.join(self.save_folder, 'bar_graphs_and_cluster_plots', self.sample_name) plt.savefig(bar_filename + 'bar_graph.png', dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.savefig(bar_filename + 'bar_graph.eps', dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.close(bar_figure) view.update_progress_bar(progress_bar) QtWidgets.QApplication.processEvents() cluster_figure, cluster_ax = plt.subplots() cluster_figure.set_size_inches(3, 6) cluster_figure.set_dpi(300) cluster_ax.boxplot(bar_data, sym='', vert=True, medianprops=dict(color='k')) x_coord = [1] * len(bar_data) bar_data_square = [i**2 for i in bar_data] x_fudge_factor = np.divide(x_coord, bar_data_square) x_fudge_factor[x_fudge_factor > 0.2] = 0.2 x_fudge_factor[x_fudge_factor < 0.02] = 0.02 x_fudge_choice = [uniform(-x_fudge_factor[i], x_fudge_factor[i]) for i, val in enumerate(x_fudge_factor)] x_coord = np.array(x_coord) + np.array(x_fudge_choice) bar_color['alpha'] = [0.7] * len(bar_color) bar_color = [tuple(x) for x in bar_color.values] cluster_ax.scatter(x_coord, bar_data, s=150, c=bar_color) plt.ylim(0, 100) cluster_ax.yaxis.set_major_locator(ticker.MultipleLocator(10)) plt.savefig(bar_filename + 'cluster_graph.png', dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.savefig(bar_filename + 'cluster_graph.eps', dpi=300, transparent=True, pad_inches=0, Bbox='tight') plt.close(cluster_figure) view.update_progress_bar(progress_bar) QtWidgets.QApplication.processEvents() ######### BACKGATE PLOTS ##### for i in set(self.cluster_data_idx): meanG = np.mean(self.default_transformed.loc[self.cluster_data_idx == i, 'YFP']) meanB = np.mean(self.default_transformed.loc[self.cluster_data_idx == i, 'CFP']) if meanG < 0.5 < meanB: quadrant = 1 elif meanB > 0.5 and meanG > 0.5: quadrant = 2 elif meanB < 0.5 and meanG < 0.5: quadrant = 3 elif meanB < 0.5 < meanG: quadrant = 4 self.make_backgate_plot(i, quadrant) view.update_progress_bar(progress_bar) QtWidgets.QApplication.processEvents()