def parse_contents_result(contents, filename): content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) try: if 'pkl' in filename: # initialize staff result = ResultProcessing() result.load_models_directly(io.BytesIO(decoded)) return result except Exception as e: print(e) raise ValueError('There was an error processing this file.')
def update_feature_pairwise_co_occurrence_graph(pro_len, result_data, ori_df): names = ResultProcessing.read_dataset_names(ori_df) # result_data = jsonpickle.decode(result_data) result_data.calculate_featureList_and_calcvariableList() if pro_len == 'All' or pro_len > 1: cooc_matrix, feature_index = result_data.get_feature_co_occurences_matrix( pro_len) hover_text = [] for yi, yy in enumerate(feature_index): hover_text.append([]) for xi, xx in enumerate(feature_index): hover_text[-1].append('X: {}<br />Y: {}<br />Count: {}'.format( names[int(xx)], names[int(yy)], cooc_matrix[xi, yi])) feature_index = ['f' + str(i) for i in feature_index] return { 'data': [{ 'z': cooc_matrix, 'x': feature_index, 'y': feature_index, 'type': 'heatmap', 'colorscale': 'Viridis', 'hoverinfo': 'text', 'text': hover_text }], 'layout': { 'title': '<b>Feature Pairwise Co-occurrence</b> ', } } return { 'layout': { 'title': '<b>Feature Pairwise Co-occurrence</b> ', } }
def update_feature_occurrence_graph(pro_len, result_data, ori_df): names = ResultProcessing.read_dataset_names(ori_df) result_data.calculate_featureList_and_calcvariableList() features, num_of_occurrences, cur_feature_num = result_data.get_occurrence_from_feature_list_given_length( pro_len) hover_text = [names[i] for i in features] features = ['f' + str(i) for i in features] return { 'data': [{ 'x': features, 'y': num_of_occurrences, 'type': 'bar', 'hoverinfo': 'text', 'text': hover_text }], 'layout': { 'title': '<b>Feature Occurrence</b>', 'xaxis': { 'title': 'feature index' }, 'yaxis': { 'title': 'occurrence' }, }, }, len(result_data.model_list), cur_feature_num
def update_co_occurrence_bar(specific_f_index, result_data, ori_df): names = ResultProcessing.read_dataset_names(ori_df) cooccurring_times, cooccurring_features_idx = result_data.get_cooccurrence_info_given_feature( specific_f_index) if cooccurring_times is not None: # there is co-occurrence with this feature # sort the neighbors idx = np.argsort(cooccurring_times) cooccurring_times = cooccurring_times[idx] cooccurring_features_idx = cooccurring_features_idx[idx] hover_text = [names[i] for i in cooccurring_features_idx] features = ['f' + str(i) for i in cooccurring_features_idx] return { 'data': [{ 'x': features, 'y': cooccurring_times, 'type': 'bar', 'hoverinfo': 'text', 'text': hover_text }], 'layout': { 'title': '<b>Co-occurring Features</b>', 'xaxis': { 'title': 'feature index' }, 'yaxis': { 'title': 'co-occurrence' }, }, } else: return { 'layout': { 'title': '<b>Co-occurring Features</b> ', } }
def update_main_page(n_clicks, ori_df): names = ResultProcessing.read_dataset_names(ori_df) index_list = [i for i in range(len(names))] available_indicators = list(zip(index_list, names)) if n_clicks == 0: raise PreventUpdate else: return render_main_visualization_layout(available_indicators)
def update_two_feature_scatter_plot_using_filters(xaxis_column_index, yaxis_column_index, co_click_data, ori_df): X, y = ResultProcessing.read_dataset_X_y(ori_df) names = ResultProcessing.read_dataset_names(ori_df) ctx = dash.callback_context trigger_id = ctx.triggered[0]['prop_id'].split('.')[0] if trigger_id == 'crossfilter-xaxis-column' or trigger_id == 'crossfilter-yaxis-column': xaxis_column_index = int(xaxis_column_index) yaxis_column_index = int(yaxis_column_index) elif trigger_id == 'co-occurrence-graph': xaxis_column_index = int(co_click_data['points'][0]['x'][1:]) yaxis_column_index = int(co_click_data['points'][0]['y'][1:]) # type_name = ['AD', 'Normal'] unique_label = ori_df['category'].unique() return { 'data': [ dict(x=X[:, int(xaxis_column_index)][y == type], y=X[:, int(yaxis_column_index)][y == type], mode='markers', marker={ 'size': 15, 'opacity': 0.5, 'line': { 'width': 0.5, 'color': 'white' }, }, name=type) for type in unique_label ], 'layout': dict(xaxis={ 'title': names[int(xaxis_column_index)], 'type': 'linear' }, yaxis={ 'title': names[int(yaxis_column_index)], 'type': 'linear' }, hovermode='closest', clickmode='event+select', title='<b>Two-Feature Scatter Plot</b>') }
def specific_feature_occurrence(specific_f_index, result_data, ori_df): names = ResultProcessing.read_dataset_names(ori_df) result_data.calculate_featureList_and_calcvariableList() features, num_of_occurrences, cur_feature_length = result_data.get_occurrence_from_feature_list_given_length( 'All') occurrence_dic = dict(zip(features, num_of_occurrences)) specific_f = names[int(specific_f_index)] if specific_f_index in occurrence_dic: occurrence_f = occurrence_dic[specific_f_index] return str(specific_f) + " appears in " + str( occurrence_f) + " models." else: return "This feature has zero occurrence"
def update_model_accuracy_graph(click_data, prog_len, result_data, ori_df): names = ResultProcessing.read_dataset_names(ori_df) if click_data is not None: result_data.calculate_featureList_and_calcvariableList() feature_num = int(click_data['points'][0]['x'] [1:]) # extract feature index data from click m_index = result_data.get_index_of_models_given_feature_and_length( feature_num, prog_len) testing_acc = [ result_data.model_list[i].testingAccuracy for i in m_index ] m_index = ['m' + str(i) for i in m_index] return { 'data': [ { 'x': m_index, 'y': testing_acc, 'mode': 'markers', 'marker': { 'size': 3 } }, ], 'layout': { 'title': '<b>Model Accuracy</b>' + '<br>' + 'Models containing feature ' + str(names[feature_num]), 'xaxis': { 'title': 'model index' }, 'yaxis': { 'title': 'accuracy' }, 'clickmode': 'event+select' } } return {'layout': {'title': '<b>Model Accuracy</b>'}}
import pandas as pd import numpy as np from data_processing_utils._processing_funcs import ResultProcessing result = ResultProcessing() result.load_models_from_file_path("../dataset/lgp_random_AD_vs_Normal.pkl") data = pd.read_csv('../assets/sample_data/sample_alzheimer_vs_normal.csv') X, y = ResultProcessing.read_dataset_X_y(data) names = ResultProcessing.read_dataset_names(data) result.calculate_featureList_and_calcvariableList() # test get network function # df, node_size_dic = result.get_network_data(names, 0.03, 'dUMP') # print(node_size_dic) # print(df.values) # # aaa = df.loc[(df['f1'] == 'dUMP') | (df['f2'] == 'dUMP')] # # others = np.unique(aaa[['f1', 'f2']].values) # others = others[others != 'dUMP'] # aaa2 = df.loc[(df['f1'].isin(others)) & (df['f2'].isin(others)) ] # aaa = aaa.append(aaa2, ignore_index=True) # print("dd") # end get network function # for index, row in df.iterrows(): # print(df['source'][index]) # prog_index, acc_scores = result.get_accuracy_given_length(1) # index = result.get_index_of_models_given_feature_and_length(105, 3) # print(index)
def create_sub_network(result_data, ori_df, top_percentage, specific_feature_index): top_percentage = top_percentage * 0.01 names = ResultProcessing.read_dataset_names(ori_df) specific_feature = names[int( specific_feature_index)] # convert index to name df, node_size_dic = result_data.get_network_data(names, top_percentage, specific_feature) # error catching, when no data available if df.empty: return html.Div( html.Div( dcc.Markdown(''' ##### No network graph in given selection '''), className='pretty_container eleven columns', ), className='container-display', ) nodes = [{ 'data': { 'id': node, 'label': node, 'size': node_size_dic[node] }, 'position': { 'x': np.random.randint(0, 100), 'y': np.random.randint(0, 100) }, } for node in np.unique(df[['f1', 'f2']].values)] edges = [{ 'data': { 'source': df['f1'][index], 'target': df['f2'][index], 'weight': df['weight'][index] } } for index, row in df.iterrows()] elements = nodes + edges return html.Div( html.Div( [ cyto.Cytoscape( id='cytoscape-layout-2', elements=elements, responsive=True, style={ 'width': '100%', 'height': '500px' }, layout={ 'name': 'cola', 'nodeRepulsion': 40000, 'nodeSpacing': 35, }, zoomingEnabled=False, stylesheet=[ { 'selector': 'node', 'style': { "width": "mapData(size, 0, 100, 20, 60)", "height": "mapData(size, 0, 100, 20, 60)", "content": "data(label)", "font-size": "12px", "text-valign": "center", "text-halign": "center", } }, { 'selector': 'edge', 'style': { "opacity": "0.5", "width": "mapData(weight, 0, 20, 1, 8)", "overlay-padding": "3px", "content": "data(weight)", "font-size": "10px", "text-valign": "center", "text-halign": "center", } }, ], ) # end cytoscape ], className='pretty_container eleven columns', ), className='container-display', )
def update_detailed_model_info(clickData, result_data, ori_df): names = ResultProcessing.read_dataset_names(ori_df) if clickData is not None: i = int(clickData['points'][0]['x'][1:]) return result_data.convert_program_str_repr(result_data.model_list[i], names)