def plot_model(): global model_data # Getting the predicted data frame from predict crime method. model_data = predict_crime() df_final_plot = model_data # Creaing labels for shanky diagram df_final_plot['TYPE_NO'] = df_final_plot['TYPE'] df_final_plot['TYPE_NO'].replace('THEFT', '2', inplace=True) df_final_plot['TYPE_NO'].replace('MISCHIEF', '0', inplace=True) df_final_plot['TYPE_NO'].replace('BREAK And ENTER', '1', inplace=True) df_final_plot['TYPE_NO'].replace('VEHICLE COLLISION', '3', inplace=True) df_final_plot['TYPE_NO'] = df_final_plot['TYPE_NO'].astype('int32') NEIGHBOURHOOD = go.parcats.Dimension( values=df_final_plot.NEIGHBOURHOOD, label="NEIGHBOURHOOD") DAY_TYPE = go.parcats.Dimension(values=df_final_plot.DAY_TYPE, label="DAY_TYPE") TYPE = go.parcats.Dimension(values=df_final_plot.TYPE, label="CRIME TYPE") color = df_final_plot.TYPE_NO; # Creating the shanky diagram with plotly go. fig = go.Figure(data=[go.Parcats(dimensions=[NEIGHBOURHOOD, DAY_TYPE, TYPE], line={'color': color, 'colorscale': 'rdbu'}, labelfont={'size': 12, 'family': 'Times'}, tickfont={'size': 10, 'family': 'Times'}, arrangement='freeform')]) return fig
def PCVisualize(filenum, columns, color): filepath = session.get('filepath', None) set_vars(filepath) set_mappings() global data_cols,df_dataset cols_file = 'static/parallel.csv' if(filenum == '1'): datadf = df_dataset columndata = columns.split(',') columndata = [data_cols[c] for c in columndata] resultdf = datadf[columndata] # resultdf['color'] = datadf[data_cols[color]] try: col_len = len(columndata) dict_list = [] for a in range(col_len): dict_list.append(dict(label=str(columndata[a]), values = resultdf[columndata[a]])) # data = [go.Parcats(line = dict(color = datadf[data_cols[color]], colorscale = 'rainbow', showscale = True, cmin=datadf[data_cols[color]].min(), cmax=datadf[data_cols[color]].max()), dimensions = dict_list)] data = [go.Parcats(line = dict(color = datadf[data_cols[color]], colorscale = 'viridis', showscale = True, cmid=datadf[data_cols[color]].median()), dimensions = dict_list)] graphJSON = json.dumps(data, cls=plotly.utils.PlotlyJSONEncoder) para = graphJSON except Exception as e: return render_template("500.html", error=str(e)) return render_template("paraplot.html", plot=para)
def update_figure(): fig = go.Figure(data=[ go.Parcats(dimensions=[Time_dim, Terminal_dim, Passengers_dim], line={ 'color': color, 'colorscale': colorscale }, hoveron='color', hoverinfo='count+probability', labelfont={ 'size': 18, 'family': 'Times' }, tickfont={ 'size': 16, 'family': 'Times' }, arrangement='freeform') ]) return {fig}
def feature_interactions(radio, url, feat_importance, rows): data_id = int(re.search(r"data/(\d+)", url).group(1)) if feat_importance == "done": df = pd.read_pickle("cache/df" + str(data_id) + ".pkl") fi = pd.read_pickle("cache/fi" + str(data_id) + ".pkl") else: return [] # Get meta data meta_data = pd.DataFrame(rows) try: target_attribute = meta_data[meta_data["Target"] == "true"][ "Attribute" ].values[0] target_type = meta_data[meta_data["Target"] == "true"]["DataType"].values[0] except IndexError: return "No target found", "No target found" if target_type == "nominal" or target_type == "string": y = pd.Categorical(df[target_attribute]).codes else: y = df[target_attribute] # Feature interaction plots df = clean_dataset(df) # Extract top nominal, top numeric features numerical_features = list( meta_data["Attribute"][meta_data["DataType"] == "numeric"] ) nominal_features = list( meta_data["Attribute"][meta_data["DataType"] == "nominal"] ) top_numericals = fi["index"][fi["index"].isin(numerical_features)][:4] top_nominals = fi["index"][fi["index"].isin(nominal_features)][:4] df["target"] = df[target_attribute] # Bin numeric target if target_type == "numeric": # cmap_type = 'seq' df["target_var"] = y df = bin_numeric(df, "target_var", "target") df.drop("bin", axis=1, inplace=True) df.drop("target_var", axis=1) else: # cmap_type = 'cat' try: df["target"] = df["target"].astype(int) except ValueError: logger.warning("target not converted to int") df.sort_values(by="target", inplace=True) df["target"] = df["target"].astype(str) # Radio - Display top features if radio == "top": top_features = df[fi["index"][0:4].values] top_features["target"] = df["target"] if len(top_numericals): px_mat = px.scatter_matrix(top_features, color="target", height=800) # C = ['rgb(166,206,227)', 'rgb(31,120,180)', 'rgb(178,223,138)', # 'rgb(51,160,44)', 'rgb(251,154,153)', 'rgb(227,26,28)'] # N = len(df['target'].unique()) # matrix = ff.create_scatterplotmatrix(top_features, diag='box', # index='target', # title="", # #colormap=C, # colormap_type=cmap_type, # # height=800, width=900) px_mat.update_traces(diagonal_visible=False) graph = dcc.Graph(figure=px_mat) else: d = top_features parcats = [ go.Parcats( dimensions=[ {"label": column, "values": list(d[column].values)} for column in d.columns ], line={"color": y, "colorscale": "Portland"}, hoveron="color", hoverinfo="count+probability", arrangement="freeform", ) ] layout = go.Layout(autosize=False, height=800) fig = go.Figure(data=parcats, layout=layout) graph = dcc.Graph(figure=fig) elif radio == "numeric": # Top numeric features if len(top_numericals): df_num = df[top_numericals] df_num["target"] = df["target"] px_mat = px.scatter_matrix(df_num, color="target", height=800) # matrix = ff.create_scatterplotmatrix(df_num, diag='box', #'box' # index='target', # title="", # #colormap=C, # colormap_type=cmap_type, # height=1000, width=900) graph = dcc.Graph(figure=px_mat) px_mat.update_traces(diagonal_visible=False) else: graph = html.P("No numericals found") elif radio == "nominal": if len(top_nominals): df_nom = df[top_nominals] df_nom["target"] = df["target"] parcats = [ go.Parcats( dimensions=[ {"label": column, "values": list(df_nom[column].values)} for column in df_nom.columns ], line={ "color": pd.Categorical(df_nom["target"]).codes, "colorscale": "Portland", }, hoveron="color", hoverinfo="count+probability", arrangement="freeform", ) ] layout = go.Layout(autosize=False, height=800) fig = go.Figure(data=parcats, layout=layout) graph = dcc.Graph(figure=fig) else: graph = html.P("No nominals found") return html.Div(graph, className="twelve columns")
def feature_interactions(rows, radio, url, dummy): data_id = int(re.search('data/(\d+)', url).group(1)) if dummy == "done": df = pd.read_pickle('cache/df' + str(data_id) + '.pkl') fi = pd.read_pickle('cache/fi' + str(data_id) + '.pkl') else: return [] meta_data = pd.DataFrame(rows) try: target_attribute = meta_data[meta_data["Target"] == "true"]["Attribute"].values[0] target_type = ( meta_data[meta_data["Target"] == "true"]["DataType"].values[0]) except IndexError: return "No target found", "No target found" if target_type == "nominal" or target_type == "string": y = pd.Categorical(df[target_attribute]).codes else: y = df[target_attribute] # Feature interaction plots df = clean_dataset(df) numerical_features = list( meta_data["Attribute"][meta_data["DataType"] == "numeric"]) nominal_features = list( meta_data["Attribute"][meta_data["DataType"] == "nominal"]) top_numericals = ( fi['index'][fi['index'].isin(numerical_features)][:5]) top_nominals = (fi['index'][fi['index'].isin(nominal_features)][:5]) df['target'] = df[target_attribute] C = [ 'rgb(166,206,227)', 'rgb(31,120,180)', 'rgb(178,223,138)', 'rgb(51,160,44)', 'rgb(251,154,153)', 'rgb(227,26,28)' ] if target_type == "numeric": cmap_type = 'seq' df['target'] = y df['target'] = pd.cut(df['target'], 1000).astype(str) cat = df['target'].str.extract('\((.*),', expand=False).astype(float) df['bin'] = pd.Series(cat) df.sort_values(by='bin', inplace=True) df.drop('bin', axis=1, inplace=True) else: cmap_type = 'cat' N = len(df['target'].unique()) try: df['target'] = df['target'].astype(int) except ValueError: print("target not converted to int") df.sort_values(by='target', inplace=True) df['target'] = df['target'].astype(str) if radio == "top": top_features = df[fi['index'][0:5].values] top_features['target'] = df['target'] if len(top_numericals): matrix = ff.create_scatterplotmatrix( top_features, title='Top feature interactions', diag='box', index='target', #colormap=C, colormap_type=cmap_type, height=800, width=900) graph = dcc.Graph(figure=matrix) else: d = top_features parcats = [ go.Parcats(dimensions=[{ 'label': column, 'values': list(d[column].values) } for column in d.columns], line={ 'color': y, 'colorscale': 'Portland' }, hoveron='color', hoverinfo='count+probability', arrangement='freeform') ] layout = go.Layout(autosize=False, width=1200, height=800) fig = go.Figure(data=parcats, layout=layout) graph = dcc.Graph(figure=fig) elif radio == "numeric": if len(top_numericals): df_num = df[top_numericals] df_num['target'] = df['target'] matrix = ff.create_scatterplotmatrix( df_num, title='Top numeric feature interactions', diag='box', index='target', #colormap=C, colormap_type=cmap_type, height=1000, width=1000) graph = dcc.Graph(figure=matrix) else: graph = html.P("No numericals found") elif radio == "nominal": if len(top_nominals): df_nom = df[top_nominals] df_nom['target'] = df['target'] parcats = [ go.Parcats(dimensions=[{ 'label': column, 'values': list(df_nom[column].values) } for column in df_nom.columns], line={ 'color': pd.Categorical(df_nom['target']).codes, 'colorscale': 'Portland' }, hoveron='color', hoverinfo='count+probability', arrangement='freeform') ] layout = go.Layout(autosize=False, width=1000, height=800) fig = go.Figure(data=parcats, layout=layout) graph = dcc.Graph(figure=fig) else: graph = html.P("No nominals found") return html.Div(graph)
def g_aluvial_cat(param_data, param_theme, param_dims): """ Parameters ---------- param_data : pd.DataFrame : data frame con tabla a graficar (tabla 3) param_theme : dict : diccionario con tema de visualizaciones param_dims : dict : diccionario con tamanos para visualizaciones Returns ------- fig_g_aluvial_cat : plotly : objeto/diccionario tipo plotly para graficar Debugging --------- param_data = tabla_3 param_theme = tema_base param_dims = dimensiones_base """ # generacion de dimension: categoria categoria_dim = go.parcats.Dimension( values=param_data['categoria'], label='categoria') # generacion de dimension: pais pais_dim = go.parcats.Dimension( values=param_data['pais'], label='pais') # generacion de dimension: frecuencia de ocurrencia frecuencia_dim = go.parcats.Dimension( values=param_data['frecuencia'], label='frecuencia') # generacion de dimension: presencia de patrones tipo 1 tipo_1_dim = go.parcats.Dimension( values=param_data['tipo_1'], label="tipo_1", categoryarray=[0, 1], ticktext=['sin patron', 'con patron']) # generacion de dimension: presencia de patrones tipo 2 tipo_2_dim = go.parcats.Dimension( values=param_data['tipo_2'], label="tipo_2", categoryarray=[0, 1], ticktext=['sin patron', 'con patron']) # generacion de dimension: presencia de patrones tipo 3 tipo_3_dim = go.parcats.Dimension( values=param_data['tipo_3'], label="tipo_3", categoryarray=[0, 1], ticktext=['sin patron', 'con patron']) # vector de colores para todas las lineas colores = [param_theme['color_linea_9'], param_theme['color_linea_2'], param_theme['color_linea_3'], param_theme['color_linea_4'], param_theme['color_linea_5'], param_theme['color_linea_6'], param_theme['color_linea_7'], param_theme['color_linea_8'], param_theme['color_linea_1']] # crear columna de color en los datos de entrada param_data['color'] = ['#ABABAB']*len(param_data['id']) for i in range(0, len(param_data['categoria'])): if param_data['categoria'].iloc[i] == 'Tasas de interes': param_data['color'].iloc[i] = colores[0] elif param_data['categoria'].iloc[i] == 'actividad economica': param_data['color'].iloc[i] = colores[3] elif param_data['categoria'].iloc[i] == 'consumo': param_data['color'].iloc[i] = colores[8] elif param_data['categoria'].iloc[i] == 'energia': param_data['color'].iloc[i] = colores[6] elif param_data['categoria'].iloc[i] == 'flujos de capital': param_data['color'].iloc[i] = colores[4] elif param_data['categoria'].iloc[i] == 'inflacion': param_data['color'].iloc[i] = colores[5] elif param_data['categoria'].iloc[i] == 'mercado inmobiliario': param_data['color'].iloc[i] = colores[1] elif param_data['categoria'].iloc[i] == 'mercado laboral': param_data['color'].iloc[i] = colores[7] elif param_data['categoria'].iloc[i] == 'subasta de bonos': param_data['color'].iloc[i] = colores[2] color = param_data['color'].tolist() # generacion del objeto figura fig_g_aluvial_cat = go.Figure() # agregar trazo de grafica tipo aluvial (parallel categories) fig_g_aluvial_cat.add_trace(go.Parcats( dimensions=[categoria_dim, frecuencia_dim, pais_dim, tipo_1_dim, tipo_2_dim, tipo_3_dim], line={'color': color}, hoveron='color', hoverinfo='count+probability', labelfont={'size': 14, 'family': 'Times', 'color': param_theme['color_texto_ejes']}, tickfont={'size': 14, 'family': 'Times', 'color': param_theme['color_texto_ejes']}, arrangement='perpendicular')) # layout de margen, titulos y ejes fig_g_aluvial_cat.update_layout( margin=go.layout.Margin(l=100, r=25, b=5, t=25, pad=10), title=None) # Formato de tamanos fig_g_aluvial_cat.layout.autosize = True fig_g_aluvial_cat.layout.width = param_dims['figura_3']['width'] fig_g_aluvial_cat.layout.height = param_dims['figura_3']['height'] return fig_g_aluvial_cat
dims = [] for _, dim_key in enumerate(dim_parallel): dims.append( go.parcats.Dimension(values=filtered_table[dim_key], label=dim_key)) if c_key != 'None': unique_list = np.sort(filtered_table[c_key].unique()) if np.issubdtype(unique_list.dtype, np.integer) or \ np.issubdtype(unique_list.dtype, np.floating): parallel_fig = go.Figure(data=[ go.Parcats(dimensions=dims, line={ 'color': filtered_table[c_key], 'colorbar': dict(title=c_key) }, hoveron='color', hoverinfo='count+probability', arrangement='freeform') ]) else: filtered_table['_C_'] = np.zeros_like( filtered_table[c_key]) for idx, var in enumerate(unique_list): filtered_table.loc[filtered_table[c_key] == var, '_C_'] = idx parallel_fig = go.Figure(data=[ go.Parcats(dimensions=dims, line={'color': filtered_table['_C_']}, hoverinfo='count+probability',