def post_collection(self, name, analysis_ids): self.load_dataframes() parent_collections = [ get_collection(current_user, collection_id) for collection_id in self.loaded_collection_ids ] label_df = self._label_df # merge collection attributes if len(self.loaded_collection_ids) > 1: collection_lengths = [ len(self._label_df[self._label_df.original_collection_id == collection_id]) for collection_id in self.loaded_collection_ids ] collection_attrs = [collection.get_file_attributes() for collection in parent_collections] all_attr_keys = set.union(*[set(collection_attr.keys()) for collection_attr in collection_attrs]) attr_values = { attr_key: [ collection_attr[attr_key] if attr_key in collection_attr else None for collection_attr in collection_attrs ] for attr_key in all_attr_keys } attrs = {} for attr_key, attr_values_ in attr_values.items(): valid_values = [v for v in attr_values_ if v is not None] if len(valid_values) == 1: attrs[attr_key] = valid_values[0] else: vals = [[v for _ in range(length)] for v, length in zip(valid_values, collection_lengths)] label_df[attr_key] = [item for sublist in vals for item in sublist] else: attrs = get_collection(current_user, self.loaded_collection_ids[0]).get_file_attributes() filename = os.path.join(self.root_dir, 'processed.h5') attrs['processing_log'] = self.processing_log description = '\n\n'.join([collection.description for collection in parent_collections] + [self.processing_log]) new_data = { 'name': name, 'description': description, 'analysis_ids': analysis_ids, 'parent_collection_id': self.loaded_collection_ids[0], 'group_can_read': all([collection.group_can_read for collection in parent_collections]), 'all_can_read': all([collection.all_can_read for collection in parent_collections]), 'group_can_write': all([collection.group_can_write for collection in parent_collections]), 'all_can_write': all([collection.all_can_write for collection in parent_collections]) } self.write_collection(self._numeric_df, label_df, attrs, filename) new_collection = upload_collection(current_user, filename, new_data) return new_collection
def results_filename(self): if self.results_collection_id is not None: try: return get_collection(current_user, self.results_collection_id).filename except: return None return None
def get_collections(self, collection_ids): super().get_collections(collection_ids) try: self.processing_log = get_collection(current_user, collection_ids[0]).get_attr('processing_log') except Exception as e: print(e) self.processing_log = '' x = [float(i) for i in self._numeric_df.columns] self.x_axis_range = [max(x), min(x)] y_max = np.max(self._numeric_df.values) self.y_axis_range = [-0.05*y_max, 1.05 * y_max]
def render_collection(collection_id=None): try: current_user = get_current_user() collection = get_collection(current_user, collection_id) if request.method == 'DELETE': delete_collection(current_user, collection_id) return redirect(url_for('collections.render_collection_list')) if request.method == 'POST': dataset_name = request.form.get('dataset_name') dataset_type = request.form.get('dtype') dt.collections.create_new_label_dataset(current_user, collection, dataset_name, dataset_type) return render_template('pages/collection_entry.html', page_data=CollectionPageData( current_user, collection)) except Exception as e: return handle_exception_browser(e)
def _save_loadings(group_key, name): with h5py.File(self.results_filename, 'r') as results_file: group = results_file[group_key] values = [ np.array(group['pls']['x_loadings']), np.array(group['feature_p_values']), np.array(group['opls']['P_ortho']) ] columns = ['x_loadings', 'p_value'] + [ f'P_ortho[{i}]' for i in range(values[2].shape[1]) ] input_collection_id = results_file.attrs[ 'input_collection_id'] if 'input_collection_id' in results_file.attrs else None if input_collection_id is not None: try: input_collection = get_collection(current_user, input_collection_id) x = input_collection.get_dataset('x') try: x_min = input_collection.get_dataset('x_min') x_max = input_collection.get_dataset('x_max') except: x_min = x_max = None except: x_min = x_max = x = None else: x_min = x_max = x = None df = pd.DataFrame(np.column_stack(values), columns=columns) if x is not None and len(x) == len(df): df.index = x df.index.name = 'x' if x_min is not None and len(x_min) == len(df): df['x_min'] = x_min df['x_max'] = x_max df = df[['x_min', 'x_max'] + columns] if file_format == 'csv': df.to_csv(name)
def get_plot(self, queries, group_by, labels, theme, bin_collection_id, legend_style, background_color): print(background_color) labels = labels or [] self.load_dataframes() if bin_collection_id is not None: print(bin_collection_id) bin_collection = get_collection(current_user, bin_collection_id) x_mins = bin_collection.get_dataset('x_min').ravel().tolist() x_maxes = bin_collection.get_dataset('x_max').ravel().tolist() colors = [DEFAULT_PLOTLY_COLORS[i % 2] for i in range(len(x_mins))] shapes = [ go.layout.Shape( type='rect', xref='x', yref='paper', x0=x_min, y0=0, x1=x_max, y1=1, fillcolor=color, opacity=0.2, layer='below', line_width=0 ) for x_min, x_max, color in zip(x_mins, x_maxes, colors) ] else: shapes = [] axis_line_style = { 'zerolinecolor': '#375A7F', # darkly primary 'gridcolor': '#444444' # darkly secondary } if theme == 'plotly_dark' and background_color != 'rgba(255,255,255,1)' else { 'zerolinecolor': '#2C3E50', # flatly primary 'gridcolor': '#95A5A6' # flatly secondary } if legend_style in ('full', 'groups'): layout = go.Layout( height=700, font={'size': 16}, margin={'t': 25, 'l': 25, 'b': 25, 'r': 25}, template=theme, plot_bgcolor=background_color, paper_bgcolor=background_color, xaxis={ 'title': 'Chemical Shift (ppm)', 'autorange': 'reversed', **axis_line_style }, yaxis={ 'title': 'Intensity', **axis_line_style }, shapes=shapes ) else: # if legend_style == 'none' layout = go.Layout( height=700, font={'size': 16}, margin={'t': 25, 'l': 25, 'b': 25, 'r': 25}, template=theme, plot_bgcolor=background_color, paper_bgcolor=background_color, xaxis={ 'title': 'Chemical Shift (ppm)', 'autorange': 'reversed', **axis_line_style }, yaxis={ 'title': 'Intensity', **axis_line_style }, shapes=shapes, showlegend=False ) color_indices = [self._label_df.query(query).index for query in queries] if len(color_indices) > len(DEFAULT_PLOTLY_COLORS): # repeat default color list colors = [] while len(colors) < len(color_indices): colors += DEFAULT_PLOTLY_COLORS else: colors = DEFAULT_PLOTLY_COLORS colors = colors[:len(color_indices)] x = self._numeric_df.columns.values.astype(float) figure = go.Figure(layout=layout) if legend_style == 'full' or legend_style == 'groups': figure.add_trace( go.Scatter( # dummy series to use as stand-in for legend title x=[0], y=[0], name=','.join(group_by), mode='markers', marker={ 'opacity': 0, 'size': 0, 'color': 'rgba(0,0,0,0)' } ) ) for query, color in zip(queries, colors): # split query figure.add_trace( go.Scatter( # dummy series to label colors x=[0], y=[0], name=','.join(re.findall(r'["](\w+)["]', query)), # pretty kludgy mode='lines', marker={'color': color}, legendgroup=query ) ) figure.add_trace( go.Scatter( # dummy series to provide space between color key and "heading" x=[0], y=[0], name='', mode='markers', marker={ 'opacity': 0, 'size': 0, 'color': 'rgba(0,0,0,0)' } ) ) if legend_style == 'full': figure.add_trace( go.Scatter( # dummy series to use as stand-in for legend title x=[0], y=[0], name=f"({', '.join(labels)})" if len(labels) else 'Spectrum #', mode='markers', marker={ 'opacity': 0, 'size': 0, 'color': 'rgba(0,0,0,0)' } ) ) for query, color in zip(queries, colors): y_values = self._numeric_df.loc[self._label_df.query(query).index] for i, row in y_values.iterrows(): text = '<br>'.join([f'{label}=={self._label_df.loc[i][label]}' for label in self._label_df.columns]) if len(labels): name = f"({', '.join([f'{self._label_df.loc[i][label]}' for label in labels])})" else: name = f'({i})' if legend_style == 'groups': figure.add_trace( go.Scatter( x=x, y=row, text=text, name=','.join(re.findall(r'["](\w+)["]', query)), # pretty kludgy mode='lines', marker={'color': color, 'size': 1}, legendgroup=query, showlegend=False ) ) else: figure.add_trace( go.Scatter( x=x, y=row, text=text, name=name, mode='lines', marker={'color': color, 'size': 2}, showlegend=(legend_style == 'full') ) ) return figure
def get_loading_significance_table(self, group_key, theme=None, wrap=True): description = h5py.File( self.results_filename)[group_key].attrs['description'] if self.results_file_ready: theme, style_header, style_cell = self._get_table_styles(theme) with h5py.File(self.results_filename, 'r') as file: feature_labels = np.array(file[group_key]['feature_labels']) loadings = np.array( file[group_key]['pls']['x_loadings']).ravel() p_values = np.array(file[group_key]['feature_p_values']) alpha = file[group_key].attrs['outer_alpha'] base_collection_id = file.attrs[ 'input_collection_id'] if 'input_collection_id' in file.attrs else None x_min = x_max = None if base_collection_id is not None: try: base_collection = get_collection(current_user, int(base_collection_id)) x = base_collection.get_dataset('/x').ravel() x_min = base_collection.get_dataset('/x_min').ravel() x_max = base_collection.get_dataset('/x_max').ravel() x_min = x_min[np.in1d(x, feature_labels)] x_max = x_max[np.in1d(x, feature_labels)] except Exception as e: x_min = x_max = None valid_bin_boundaries = (x_min is not None and x_max is not None and x_max.shape[0] == x_min.shape[0] == feature_labels.shape[0]) is_significant = p_values < alpha df = pd.DataFrame() df['Bin'] = feature_labels df['Loading'] = loadings if valid_bin_boundaries: df['Bin Max'] = x_max df['Bin Min'] = x_min df['p Value'] = p_values df['Significant'] = ['*' if s else '' for s in is_significant] df = df.sort_values(['Significant', 'Bin'], ascending=[False, True]) # format table for better display in browser df['p Value'] = df['p Value'].round(7).apply( lambda val: f'{val:.7f}') df['Bin'] = df['Bin'].round(4).apply(lambda val: f'{val:.4f}') df['Loading'] = df['Loading'].round(5).apply( lambda val: f'{val:.5f}') if valid_bin_boundaries: df['Bin Max'] = df['Bin Max'].round(4).apply( lambda val: f'{val:.4f}') df['Bin Min'] = df['Bin Min'].round(4).apply( lambda val: f'{val:.4f}') del df['Significant'] df['Index'] = [str(i) for i in df.index] if valid_bin_boundaries: df = df[[ 'Index', 'Bin Max', 'Bin', 'Bin Min', 'Loading', 'p Value' ]] else: df = df[['Index', 'Bin', 'Loading', 'p Value']] style_data_conditional = [{ 'if': { 'filter_query': f'{{p Value}} < {alpha}' }, 'backgroundColor': '#D2F9F1' }, { 'if': { 'filter_query': f'{{p Value}} > {alpha}' }, 'backgroundColor': '#F9D9D2' }] table = dash_table.DataTable( id=f'feature-table', columns=[{ 'name': i, 'id': i } for i in df.columns], data=df.to_dict('records'), style_table={ 'height': '500px', 'overflowY': 'scroll' }, fixed_rows={ 'headers': True, 'data': 0 }, is_focused=True, style_data_conditional=style_data_conditional, style_header=style_header, style_cell=style_cell, style_cell_conditional=[ { 'if': { 'column_id': 'Index' }, 'width': f'{max(df.Index.str.len().max(), 5) + 2}ch' }, { 'if': { 'column_id': 'Bin' }, 'width': f'{df.Bin.str.len().max() + 2}ch' }, { 'if': { 'column_id': 'Bin Max' }, 'width': f'{df.Bin.str.len().max() + 2}ch' }, { 'if': { 'column_id': 'Bin Min' }, 'width': f'{df.Bin.str.len().max() + 2}ch' }, { 'if': { 'column_id': 'p Value' }, 'width': '12ch' }, ]) if wrap: return html.Div( [dbc.Row(html.H4(description)), dbc.Row(table)]) else: return table return [dash_table.DataTable(id='feature-table')]