def make_sankey(self, theme): fig = go.Figure(data=[go.Sankey( valueformat = ".0f", valuesuffix = "TWh", # Define nodes node = dict( pad = 15, thickness = 15, line = dict(color = "black", width = 0.5), label = data['data'][0]['node']['label'], color = data['data'][0]['node']['color'] ), # Add links link = dict( source = data['data'][0]['link']['source'], target = data['data'][0]['link']['target'], value = data['data'][0]['link']['value'], label = data['data'][0]['link']['label'], color = data['data'][0]['link']['color'] ))]) return fig
def sankey_plot(corr_mat, parties_apr, parties_sep): source, target = np.meshgrid( np.arange(0, len(parties_apr)), np.arange(len(parties_apr), len(parties_apr) + len(parties_sep))) source = source.flatten() target = target.flatten() fig = go.Figure(data=[ go.Sankey(node=dict( thickness=12, label=list(parties_apr) + list(parties_sep), ), link=dict( source=source, target=target, value=corr_mat.flatten(), )) ]) fig.update_layout( title_text= "Votes Moving: September 2019 Elections ---> April 2019 Elections") fig.show()
def update_sankey_plot(data): fig = go.Figure(data=[ go.Sankey( valueformat=".0f", valuesuffix="TWh", # Define nodes node=dict(pad=15, thickness=15, line=dict(color="black", width=0.5), label=data['data'][0]['node']['label'], color=data['data'][0]['node']['color']), # Add links link=dict(source=data['data'][0]['link']['source'], target=data['data'][0]['link']['target'], value=data['data'][0]['link']['value'], label=data['data'][0]['link']['label'])) ]) fig.update_layout( title_text= "Energy forecast for 2050<br>Source: Department of Energy & Climate Change, Tom Counsell via <a href='https://bost.ocks.org/mike/sankey/'>Mike Bostock</a>", font_size=10) return fig
def update_Sankey(xaxis_column1_name): dff = df[xaxis_column1_name] week = [i for i in df.DayOfWeek.unique()] region = [i for i in df.PdDistrict.unique()] other_type = [i for i in df[xaxis_column1_name].unique()] size = week + region + other_type fig = go.Figure(data=[ go.Sankey(node=dict( pad=15, thickness=15, line=dict(color='blue', width=0.8), label=size, color=[ 'rgb(50, 168, 160)', 'yellow', 'green', 'purple', 'white', 'orange', 'rgb(131, 207, 56)', 'rgb(43, 196, 156)', 'rgb(28, 92, 156)', 'rgb(11, 120, 230)', 'rgb(232, 39, 120)', 'rgb(186, 103, 48)', 'rgb(23, 156, 76)', 'rgb(55, 21, 176)', 'rgb(100, 156, 123)', 'rgb(31, 66, 110)', 'rgb(186, 150, 48)', 'rgb(131, 207, 56)', 'rgb(232, 39, 120)' ]), link=getData(size, dff, week, xaxis_column1_name)) ]) return fig
def format_data(nodes): label, color_node, source, target, value, color_link, customdata = [], [], [], [], [], [], [] for node in nodes: label.append(node.move) color_node.append(color_codes[node.color]) customdata.append([node.game_string(), node.score() * 100]) if node.parent.move != 'root': source.append(node.parent.idx) target.append(node.idx) value.append(node.visits) color_link.append(score2color(node.score())) node = dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=label, color=color_node, customdata=customdata, hovertemplate= "%{customdata[0]}<br />You scored %{customdata[1]:.0f}% in %{value:.f} games<extra></extra>" ) link = dict( source=source, target=target, value=value, color=color_link, hovertemplate= "%{target.label}<br />%{value:.f} of %{source.value:.f} games<extra></extra>" ) data = go.Sankey(node=node, link=link) return data
def get_pooling_sankey_diagram(_df, _name, _unit=1000.0): pool_tot_share = _df["multi_passengers_trips_per_ride_hail_trips"] pool_share = _df["multi_passengers_trips_per_pool_trips"] solo_share = (_df["solo_trips"] + _df["one_passenger_pool_trips"]) / _df["ride_hail_requests"] unmatched_share = ( _df["unmatched_pool_requests"] + _df["unmatched_solo_requests"]) / _df["ride_hail_requests"] labels = [ "pool requests: {:.1f}K".format(_df["ride_hail_pool_requests"] / _unit), "solo requests: {:.1f}K".format(_df["ride_hail_solo_requests"] / _unit), "pool: {:.1%} ({:.1%})".format(pool_tot_share, pool_share), "solo: {:.1%}".format(solo_share), "unmatched: {:.1%}".format(unmatched_share) ] fig = go.Figure(data=[ go.Sankey( # Define nodes node=dict(pad=15, thickness=15, line=dict(color="black", width=0.5), label=labels), # Add links link=dict(source=[0, 0, 0, 1, 1], target=[2, 3, 4, 3, 4], value=[ _df["multi_passenger_pool_trips"], _df["one_passenger_pool_trips"], _df["unmatched_pool_requests"], _df["solo_trips"], _df["unmatched_solo_requests"] ])) ]) fig.update_layout(title_text="Sankey Diagram For Pooling", font_size=10) fig.write_image("{}/pooling-metrics-sankey.png".format(_name))
def update_Sankey(selected_poke_attr,selectedData): if selectedData: indices = [point['pointIndex'] for point in selectedData['points']] dff = df.iloc[indices, :] else: dff = df is_legendary = [i for i in dff.isLegendary.unique()] color = [i for i in dff.Color.unique()] selected_attr = [i for i in dff[selected_poke_attr].unique()] size = is_legendary+color+selected_attr fig = go.Figure(data=[go.Sankey( node=dict( pad=15, thickness=15, line=dict(color='blue', width=0.8), label=size, color=['rgb(50, 168, 160)', 'yellow', 'green', 'purple', 'white', 'orange', 'rgb(131, 207, 56)', 'rgb(43, 196, 156)', 'rgb(28, 92, 156)', 'rgb(11, 120, 230)', 'rgb(232, 39, 120)', 'rgb(186, 103, 48)', 'rgb(23, 156, 76)', 'rgb(55, 21, 176)', 'rgb(100, 156, 123)', 'rgb(31, 66, 110)', 'rgb(186, 150, 48)', 'rgb(131, 207, 56)', 'rgb(232, 39, 120)'] ), link=getData(size, color, dff, is_legendary,selected_poke_attr) )]) return fig
def fig(self): grouped_cash_flow = self._create_grouped_df() self._calc_edge_values(grouped_cash_flow) figure = go.Figure( layout=dict(height=800), data=[go.Sankey( arrangement='perpendicular', valuesuffix='€', valueformat=".2f", node=dict( pad=15, thickness=15, line=dict(color="black", width=0.5), label=self.label, color="blue" ), link=dict( source=self.source, target=self.target, value=self.value ))]) figure.update_layout(font_size=10, margin=dict(l=20, r=20, t=10, b=10)) return figure
value_list.append(links_dict[(s, t)]) elif args.method == "networkx": source_list.append(s) target_list.append(t) else: print "Invalid method: {}".format(method) if args.method == "plotly": import plotly.graph_objects as go fig = go.Figure(data=[ go.Sankey( valueformat=".0f", valuesuffix="s", # Define nodes node=dict(pad=15, thickness=15, line=dict(color="black", width=0.5), label=label_list), # Add links link=dict(source=source_list, target=target_list, value=value_list)) ]) fig.update_layout(title_text="Basic Sankey Diagram", font_size=10) fig.show() elif args.method == "networkx": import networkx as nx from networkx.drawing.nx_agraph import graphviz_layout, to_agraph
def clf_model_retrieval(self, metrics=None): """This function implements classification model retrieval visualization. Parameters ---------- metrics : str, default = None Value in ["accuracy","precision","recall"]. Example ------- .. [] https://Optimal-Flow.readthedocs.io/en/latest/demos.html#pipeline-cluster-traversal-experiments-model-retrieval-diagram-using-autoviz References ---------- """ columns = [ "Dataset", "Encode_low_dimension", "Encode_high_dimension", "Winsorize", "Scale" ] df_pp = pd.DataFrame(columns=columns) for i in list(self.DICT_PREPROCESSING.keys()): row_pp = [i] s = self.DICT_PREPROCESSING[i] ext = re.search("Encoded Features:(.*)']", s).group(1) if ("onehot_" in ext) and ("Frequency_" in ext): row_pp.append('Low Dim_Onehot') row_pp.append('High Dim_Frequency') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("onehot_" in ext) and ("Mean_" in ext): row_pp.append('Low Dim_Onehot') row_pp.append('High Dim_Mean') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("onehot_" in ext) and ("Mean_" not in ext) and ("Frequency_" not in ext): row_pp.append('Low Dim_Onehot') row_pp.append('High Dim_No Encoder') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("Label_" in ext) and ("Frequency_" in ext): row_pp.append('Low Dim_Label') row_pp.append('High Dim_Frequency') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("Label_" in ext) and ("Mean_" in ext): row_pp.append('Low Dim_Label') row_pp.append('High Dim_Mean') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("Label_" in ext) and ("Mean_" not in ext) and ("Frequency_" not in ext): row_pp.append('Low Dim_Label') row_pp.append('High Dim_No Encoder') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("Frequency_" in ext) and ("onehot_" not in ext) and ("Label_" not in ext): row_pp.append('Low Dim_No Encoder') row_pp.append('High Dim_Frequency') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("Mean_" in ext) and ("onehot_" not in ext) and ("Label_" not in ext): row_pp.append('Low Dim_No Encoder') row_pp.append('High Dim_Mean') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp elif ("Frequency_" not in ext) and ("Mean_" not in ext) and ( "onehot_" not in ext) and ("Label_" not in ext): row_pp.append('Low Dim_No Encoder') row_pp.append('High Dim_No Encoder') row_pp.append(re.search('winsor_(.*)-Scaler', s).group(1)) row_pp.append(re.search('-Scaler_(.*)-- ', s).group(1)) df_pp.loc[len(df_pp)] = row_pp if metrics == "accuracy": df_report_Accuracy = df_pp.merge( self.dyna_report[['Dataset', 'Accuracy']], how='left', on='Dataset') bins = [0, 0.70, 0.90, 1] labels = ["Low Accuracy", "High Accuracy", "Top Accuracy"] df_report_Accuracy['Level'] = pd.cut( df_report_Accuracy['Accuracy'], bins=bins, labels=labels) df_report_Accuracy['cnt'] = 1 df_report_Accuracy.loc[df_report_Accuracy['Scale'] == 'None', 'Scale'] = "No Scaler" df_report_Accuracy[ 'Scale'] = 'Scale_' + df_report_Accuracy['Scale'] df_report_Accuracy[ 'Winsorize'] = 'Winsorize_' + df_report_Accuracy['Winsorize'] step1_df = df_report_Accuracy.groupby( ['Encode_low_dimension', 'Dataset'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Dataset": "antecedentIndex", "Encode_low_dimension": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step2_df = df_report_Accuracy.groupby( ['Encode_low_dimension', 'Encode_high_dimension'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Encode_low_dimension": "antecedentIndex", "Encode_high_dimension": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step3_df = df_report_Accuracy.groupby( ['Encode_high_dimension', 'Winsorize'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Encode_high_dimension": "antecedentIndex", "Winsorize": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step4_df = df_report_Accuracy.groupby( ['Winsorize', 'Scale'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Winsorize": "antecedentIndex", "Scale": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step5_df = df_report_Accuracy.groupby( ['Scale', 'Level'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Scale": "antecedentIndex", "Level": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']].dropna() integrated_df = pd.concat( [step1_df, step2_df, step3_df, step4_df, step5_df], axis=0) label_df = pd.DataFrame(integrated_df['antecedentIndex'].append( integrated_df['consequentIndex']).drop_duplicates(), columns={"label"}) label_df['Number'] = label_df.reset_index().index label_list = list(label_df.label) source_df = pd.DataFrame(integrated_df['antecedentIndex']) source_df = source_df.merge(label_df, left_on=['antecedentIndex'], right_on=['label'], how='left') source_list = list(source_df['Number']) target_df = pd.DataFrame(integrated_df['consequentIndex']) target_df = target_df.merge(label_df, left_on=['consequentIndex'], right_on=['label'], how='left') target_list = list(target_df['Number']) value_list = [int(i) for i in list(integrated_df.Total)] fig = go.Figure(data=[ go.Sankey(node=dict(pad=15, thickness=10, line=dict(color='rgb(25,100,90)', width=0.5), label=label_list, color='rgb(71,172,55)'), link=dict(source=source_list, target=target_list, value=value_list)) ]) fig.update_layout( title= f'Pipeline Cluster Traversal Experiments - autoViz {metrics} Retrieval Diagram <a href="https://www.linkedin.com/in/lei-tony-dong/"> ©Tony Dong</a>', font_size=8) plot(fig) fig.show() elif metrics == "precision": df_report_Precision = df_pp.merge( self.dyna_report[['Dataset', 'Precision']], how='left', on='Dataset') bins = [0, 0.70, 0.90, 1] labels = ["Low Precision", "High Precision", "Top Precision"] df_report_Precision['Level'] = pd.cut( df_report_Precision['Precision'], bins=bins, labels=labels) df_report_Precision['cnt'] = 1 df_report_Precision.loc[df_report_Precision['Scale'] == 'None', 'Scale'] = "No Scaler" df_report_Precision[ 'Scale'] = 'Scale_' + df_report_Precision['Scale'] df_report_Precision[ 'Winsorize'] = 'Winsorize_' + df_report_Precision['Winsorize'] step1_df = df_report_Precision.groupby( ['Encode_low_dimension', 'Dataset'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Dataset": "antecedentIndex", "Encode_low_dimension": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step2_df = df_report_Precision.groupby( ['Encode_low_dimension', 'Encode_high_dimension'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Encode_low_dimension": "antecedentIndex", "Encode_high_dimension": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step3_df = df_report_Precision.groupby( ['Encode_high_dimension', 'Winsorize'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Encode_high_dimension": "antecedentIndex", "Winsorize": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step4_df = df_report_Precision.groupby( ['Winsorize', 'Scale'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Winsorize": "antecedentIndex", "Scale": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step5_df = df_report_Precision.groupby( ['Scale', 'Level'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Scale": "antecedentIndex", "Level": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']].dropna() integrated_df = pd.concat( [step1_df, step2_df, step3_df, step4_df, step5_df], axis=0) label_df = pd.DataFrame(integrated_df['antecedentIndex'].append( integrated_df['consequentIndex']).drop_duplicates(), columns={"label"}) label_df['Number'] = label_df.reset_index().index label_list = list(label_df.label) source_df = pd.DataFrame(integrated_df['antecedentIndex']) source_df = source_df.merge(label_df, left_on=['antecedentIndex'], right_on=['label'], how='left') source_list = list(source_df['Number']) target_df = pd.DataFrame(integrated_df['consequentIndex']) target_df = target_df.merge(label_df, left_on=['consequentIndex'], right_on=['label'], how='left') target_list = list(target_df['Number']) value_list = [int(i) for i in list(integrated_df.Total)] fig = go.Figure(data=[ go.Sankey(node=dict(pad=15, thickness=10, line=dict(color='rgb(25,100,90)', width=0.5), label=label_list, color='rgb(71,172,55)'), link=dict(source=source_list, target=target_list, value=value_list)) ]) fig.update_layout( title= f'Pipeline Cluster Traversal Experiments - autoViz {metrics} Retrieval Diagram <a href="https://www.linkedin.com/in/lei-tony-dong/"> ©Tony Dong</a>', font_size=8) plot(fig) fig.show() elif metrics == "recall": df_report_Recall = df_pp.merge(dyna_report[['Dataset', 'Recall']], how='left', on='Dataset') bins = [0, 0.70, 0.90, 1] labels = ["Low Recall", "High Recall", "Top Recall"] df_report_Recall['Level'] = pd.cut(df_report_Recall['Recall'], bins=bins, labels=labels) df_report_Recall['cnt'] = 1 df_report_Recall.loc[df_report_Recall['Scale'] == 'None', 'Scale'] = "No Scaler" df_report_Recall['Scale'] = 'Scale_' + df_report_Recall['Scale'] df_report_Recall[ 'Winsorize'] = 'Winsorize_' + df_report_Recall['Winsorize'] step1_df = df_report_Recall.groupby( ['Encode_low_dimension', 'Dataset'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Dataset": "antecedentIndex", "Encode_low_dimension": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step2_df = df_report_Recall.groupby( ['Encode_low_dimension', 'Encode_high_dimension'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Encode_low_dimension": "antecedentIndex", "Encode_high_dimension": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step3_df = df_report_Recall.groupby( ['Encode_high_dimension', 'Winsorize'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Encode_high_dimension": "antecedentIndex", "Winsorize": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step4_df = df_report_Recall.groupby( ['Winsorize', 'Scale'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Winsorize": "antecedentIndex", "Scale": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']] step5_df = df_report_Recall.groupby( ['Scale', 'Level'], as_index=False)['cnt'].count().rename( { "cnt": "Total", "Scale": "antecedentIndex", "Level": "consequentIndex" }, axis=1)[['antecedentIndex', 'consequentIndex', 'Total']].dropna() integrated_df = pd.concat( [step1_df, step2_df, step3_df, step4_df, step5_df], axis=0) label_df = pd.DataFrame(integrated_df['antecedentIndex'].append( integrated_df['consequentIndex']).drop_duplicates(), columns={"label"}) label_df['Number'] = label_df.reset_index().index label_list = list(label_df.label) source_df = pd.DataFrame(integrated_df['antecedentIndex']) source_df = source_df.merge(label_df, left_on=['antecedentIndex'], right_on=['label'], how='left') source_list = list(source_df['Number']) target_df = pd.DataFrame(integrated_df['consequentIndex']) target_df = target_df.merge(label_df, left_on=['consequentIndex'], right_on=['label'], how='left') target_list = list(target_df['Number']) value_list = [int(i) for i in list(integrated_df.Total)] fig = go.Figure(data=[ go.Sankey(node=dict(pad=15, thickness=10, line=dict(color='rgb(25,100,90)', width=0.5), label=label_list, color='rgb(71,172,55)'), link=dict(source=source_list, target=target_list, value=value_list)) ]) fig.update_layout( title= f'Pipeline Cluster Traversal Experiments - autoViz {metrics} Retrieval Diagram <a href="https://www.linkedin.com/in/lei-tony-dong/"> ©Tony Dong</a>', font_size=8) plot(fig) fig.show()
def _update_execution_plot(self): # type: () -> () """ Update sankey diagram of the current pipeline """ sankey_node = dict( label=[], color=[], hovertemplate='%{label}<extra></extra>', # customdata=[], # hovertemplate='%{label}<br />Hyper-Parameters:<br />%{customdata}<extra></extra>', ) sankey_link = dict( source=[], target=[], value=[], hovertemplate='%{target.label}<extra></extra>', ) visited = [] node_params = [] nodes = list(self._nodes.values()) while nodes: next_nodes = [] for node in nodes: if not all(p in visited for p in node.parents or []): next_nodes.append(node) continue visited.append(node.name) idx = len(visited) - 1 parents = [visited.index(p) for p in node.parents or []] node_params.append(node.job.task_parameter_override if node.job else node.parameters) or {} # sankey_node['label'].append(node.name) # sankey_node['customdata'].append( # '<br />'.join('{}: {}'.format(k, v) for k, v in (node.parameters or {}).items())) sankey_node['label'].append( '{}<br />'.format(node.name) + '<br />'.join('{}: {}'.format(k, v) for k, v in (node.parameters or {}).items())) sankey_node['color'].append( ("blue" if not node.job or not node.job.is_failed() else "red") if node.executed is not None else ("green" if node.job else "lightsteelblue")) for p in parents: sankey_link['source'].append(p) sankey_link['target'].append(idx) sankey_link['value'].append(1) nodes = next_nodes # make sure we have no independent (unconnected) nodes for i in [n for n in range(len(visited)) if n not in sankey_link['source'] and n not in sankey_link['target']]: sankey_link['source'].append(i) sankey_link['target'].append(i) sankey_link['value'].append(0.1) fig = make_subplots( rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.03, specs=[[{"type": "table"}], [{"type": "sankey"}], ] ) # noinspection PyUnresolvedReferences fig.add_trace( go.Sankey( node=sankey_node, link=sankey_link, textfont=dict(color='rgba(0,0,0,0)', size=1) ), row=1, col=1 ) # noinspection PyUnresolvedReferences fig.add_trace( go.Table( header=dict( values=["Pipeline Step", "Task ID", "Parameters"], align="left", ), cells=dict( values=[visited, [self._nodes[v].executed or (self._nodes[v].job.task_id() if self._nodes[v].job else '') for v in visited], [str(p) for p in node_params]], align="left") ), row=2, col=1 ) # fig = go.Figure(data=[go.Sankey( # node=sankey_node, link=sankey_link, textfont=dict(color='rgba(0,0,0,0)', size=1))],) self._task.get_logger().report_plotly( title='Pipeline', series='execution flow', iteration=0, figure=fig)
def sankey(df, mapping): """Plot a sankey diagram It is currently only possible to create this diagram for single years. Parameters ---------- df : :class:`pyam.IamDataFrame` Data to be plotted mapping : dict Assigns the source and target component of a variable .. code-block:: python { variable: (source, target), } Returns ------- fig : :class:`plotly.graph_objects.Figure` """ # Check for duplicates for col in [name for name in df._data.index.names if name != "variable"]: levels = get_index_levels(df._data, col) if len(levels) > 1: raise ValueError(f"Non-unique values in column {col}: {levels}") # Concatenate the data with source and target columns _df = pd.DataFrame.from_dict(mapping, orient="index", columns=["source", "target"]).merge(df._data, how="left", left_index=True, right_on="variable") label_mapping = dict([ (label, i) for i, label in enumerate(set(_df["source"].append(_df["target"]))) ]) _df.replace(label_mapping, inplace=True) region = get_index_levels(_df, "region")[0] unit = get_index_levels(_df, "unit")[0] year = get_index_levels(_df, "year")[0] fig = go.Figure(data=[ go.Sankey( valuesuffix=unit, node=dict( pad=15, thickness=10, line=dict(color="black", width=0.5), label=pd.Series(list(label_mapping)), hovertemplate="%{label}: %{value}<extra></extra>", color="blue", ), link=dict( source=_df.source, target=_df.target, value=_df.value, hovertemplate='"%{source.label}" to "%{target.label}": \ %{value}<extra></extra>', ), ) ]) fig.update_layout(title_text=f"region: {region}, year: {year}", font_size=10) return fig
def Creating_tracking_and_analyses(Sample = 100, ITN = '5306'): dir_path = os.path.dirname(os.path.realpath(__file__)) # Current directory columns = ['Generator primary NAICS name', 'SRS chemical ID',\ 'Generator condition of use', 'Quantity transferred by generator', 'EoL activity category under TSCA', \ 'EoL activity category under waste management hierarchy', 'RETDF TRIF ID', 'RETDF primary NAICS name', \ 'Maximum amount of chemical present at RETDF', 'Total chemical generated as waste by RETDF', \ 'Environmental compartment', 'RETDF chemical flow releases to the compartment', 'RETDF total chemical release'] type = {'Generator primary NAICS name':'str', 'SRS chemical ID': 'str', 'Generator condition of use':'str', 'Quantity transferred by generator':'float', 'EoL activity category under TSCA': 'str', 'EoL activity category under waste management hierarchy':'str', 'RETDF TRIF ID':'str', 'RETDF primary NAICS name':'str', 'Maximum amount of chemical present at RETDF':'int', 'Total chemical generated as waste by RETDF': 'float', 'Environmental compartment':'str', 'RETDF chemical flow releases to the compartment':'float', 'RETDF total chemical release': 'float'} df = pd.read_csv(dir_path + '/EoL_database_for_MC.csv', sep = ',', usecols = columns, low_memory = False, dtype = type, header = 0) df_chem = df.loc[df['SRS chemical ID'] == ITN] df_sankey = df_chem[['Generator primary NAICS name', 'Generator condition of use', 'Quantity transferred by generator', 'EoL activity category under TSCA', 'EoL activity category under waste management hierarchy', 'RETDF TRIF ID', 'Maximum amount of chemical present at RETDF', 'Total chemical generated as waste by RETDF', 'RETDF total chemical release', 'RETDF primary NAICS name', 'Environmental compartment', 'RETDF chemical flow releases to the compartment']] # First level (GiS -> CoU) df1 = df_sankey[['Generator primary NAICS name', 'Generator condition of use', 'Quantity transferred by generator']] Total_1 = df1['Quantity transferred by generator'].sum() df1['Proportion'] = df1['Quantity transferred by generator'].apply(lambda x: 100*x/Total_1) group1 = df1.groupby(['Generator primary NAICS name', 'Generator condition of use'], as_index = False).sum() # Second level (CoU -> RETDFiS) df2 = df_sankey[['Generator condition of use', 'Quantity transferred by generator', 'RETDF primary NAICS name']] Total_2 = df2['Quantity transferred by generator'].sum() df2['Proportion'] = df2['Quantity transferred by generator'].apply(lambda x: 100*x/Total_2) group2 = df2.groupby(['Generator condition of use', 'RETDF primary NAICS name'], as_index = False).sum() # Third level (RETDFiS -> WMH) and Fourth level (RETDFiS -> EoL) df3 = df_sankey[['Quantity transferred by generator', 'EoL activity category under TSCA', 'RETDF primary NAICS name']] Total_3 = df3['Quantity transferred by generator'].sum() df3['Proportion'] = df3['Quantity transferred by generator'].apply(lambda x: 100*x/Total_3) group_aux = df3.groupby(['RETDF primary NAICS name', 'EoL activity category under TSCA'], as_index = False).sum() group3 = group_aux.loc[group_aux['EoL activity category under TSCA'].isin(['Energy recovery', 'Recycling'])] group4 = group_aux.loc[~group_aux['EoL activity category under TSCA'].isin(['Energy recovery', 'Recycling'])] # Fifth leve (EoL -> WMH) df4 = df_sankey[['Quantity transferred by generator', 'EoL activity category under TSCA', 'EoL activity category under waste management hierarchy']] df4 = df4.loc[~df4['EoL activity category under TSCA'].isin(['Energy recovery', 'Recycling'])] df4['Proportion'] = df4['Quantity transferred by generator'].apply(lambda x: 100*x/Total_3) group5 = df4.groupby(['EoL activity category under TSCA', 'EoL activity category under waste management hierarchy'], as_index = False).sum() # Sixth level (WMH -> EC) df5 = df_sankey[['Quantity transferred by generator', 'EoL activity category under waste management hierarchy', 'RETDF TRIF ID', 'Maximum amount of chemical present at RETDF', \ 'Total chemical generated as waste by RETDF', 'Environmental compartment', 'RETDF chemical flow releases to the compartment', 'RETDF total chemical release']] n_cols = df5.shape[1] df_handler_facility = df5[['RETDF TRIF ID', 'Maximum amount of chemical present at RETDF', \ 'Total chemical generated as waste by RETDF', 'RETDF total chemical release']] \ .drop_duplicates(keep = 'first') columns_maximum = ['INV MAXIMUM QUANTITY ON-SITE ' + str(N + 1) for N in range(Sample)] df_handler_facility = df_handler_facility.merge(\ df_handler_facility.apply(lambda s: pd.Series(Cal(columns_maximum, s['Maximum amount of chemical present at RETDF'], s['Total chemical generated as waste by RETDF'], s['RETDF total chemical release'])), axis = 1), left_index = True, right_index = True) df5 = pd.merge(df5, df_handler_facility, how = 'left', on = ['RETDF TRIF ID', 'Maximum amount of chemical present at RETDF', 'Total chemical generated as waste by RETDF', 'RETDF total chemical release']) df5_aux = df5.iloc[:,n_cols:].multiply(df5['RETDF chemical flow releases to the compartment'], axis = 'index') df5_aux = df5_aux.multiply(df5['Quantity transferred by generator'], axis = 'index') columns_flow = {'INV MAXIMUM QUANTITY ON-SITE ' + str(N + 1): \ 'RELEASE TO COMPARTMENT ' + str(N + 1) for N in range(Sample)} df5_aux.rename(columns = columns_flow, inplace = True) df5 = pd.concat([df5.iloc[:,0:n_cols], df5_aux], axis = 1) del df5_aux df5['STD RETDF chemical flow releases to the compartment'] = df5.iloc[:,n_cols:Sample + n_cols].std(axis = 1) df5['MEAN RETDF chemical flow releases to the compartment'] = df5.iloc[:,n_cols:Sample + n_cols].mean(axis = 1) df6 = df5[['EoL activity category under waste management hierarchy', 'Environmental compartment', 'MEAN RETDF chemical flow releases to the compartment']] func = {'Quantity transferred by generator': lambda x: 0.25*x.sum(), 'MEAN RETDF chemical flow releases to the compartment': lambda x: x.sum()} df6_aux = df5[['EoL activity category under waste management hierarchy', 'Quantity transferred by generator', 'MEAN RETDF chemical flow releases to the compartment']] df6_aux = df6_aux.groupby('EoL activity category under waste management hierarchy', as_index = False).agg(func) df6_aux['MEAN RETDF chemical flow releases to the compartment'] = df6_aux.apply(lambda x: x['Quantity transferred by generator'] - x['MEAN RETDF chemical flow releases to the compartment'], axis = 1) df6_aux['Environmental compartment'] = None lb_wm = {'Recycling': 'Recycled', 'Disposal': 'Discarded', 'Treatment': 'Treated', 'Energy recovery': 'Energy'} for key, value in lb_wm.items(): df6_aux.loc[df6_aux['EoL activity category under waste management hierarchy'] == key, 'Environmental compartment'] = value df6_aux = df6_aux[['EoL activity category under waste management hierarchy', 'Environmental compartment', 'MEAN RETDF chemical flow releases to the compartment']] df6 = pd.concat([df6, df6_aux], axis = 0, ignore_index = True) Total_5 = df6['MEAN RETDF chemical flow releases to the compartment'].sum() df6['Proportion'] = df6['MEAN RETDF chemical flow releases to the compartment'].apply(lambda x: 100*x/Total_5) group6 = df6.groupby(['EoL activity category under waste management hierarchy', 'Environmental compartment'], as_index = False).sum() # Generating labels for Sankey diagram GiS = {val: 'GiS-' + str(idx + 1) for idx, val \ in enumerate(list(group1['Generator primary NAICS name'].unique()))} CoU = {val: 'CoU-' + str(idx + 1) for idx, val \ in enumerate(list(group1['Generator condition of use'].unique()))} RETDFiS = {val: 'RETDFiS-' + str(idx + 1) for idx, val \ in enumerate(list(group2['RETDF primary NAICS name'].unique()))} EoL = {val: 'EoL-' + str(idx + 1) for idx, val \ in enumerate(list(group4['EoL activity category under TSCA'].unique()))} WMH = {val: 'WMH-' + str(idx + 1) for idx, val \ in enumerate(list(group6['EoL activity category under waste management hierarchy'].unique()))} EC = {val: 'EC-' + str(idx + 1) for idx, val \ in enumerate(list(group6['Environmental compartment'].unique()))} # Saving percentages i = 0 for gr in [group1, group2, group3, group4, group5, group6]: i = i + 1 gr.to_csv(dir_path + '/Percentages_{}_'.format(ITN) + str(i) + '.csv', sep = ',', index = False) # Saving label names TRI = {'Added as a formulation component': 'TRIU-1', 'Used as a chemical processing aid': 'TRIU-2', 'Repackaging': 'TRIU-3', 'Ancillary or other use': 'TRIU-4', 'Produce the chemical': 'TRIU-5', 'Used as a reactant': 'TRIU-6', 'As a process impurity': 'TRIU-7', 'Used as a manufacturing aid': 'TRIU-8', 'Import the chemical': 'TRIU-9', 'Used as an article component': 'TRIU-10'} CoU_aux = {} for key, value in CoU.items(): CoU_aux.update({value: ' + '.join(TRI[e] for e in key.split(' + '))}) j = 0 for l in [GiS, TRI, CoU_aux, RETDFiS, EoL, WMH, EC]: j = j + 1 df_aux = pd.DataFrame({'Col 1': list(l.keys()), 'Col 2': list(l.values())}) df_aux.to_csv(dir_path + '/Label_names_{}_'.format(ITN) + str(j) + '.csv', sep = ',', index = False) # Levels and colors level_1 = list(GiS.values()) colors_1 = ['#ff5050' for i in range(len(level_1))] level_2 = list(CoU.values()) colors_2 = ['#0066cc' for i in range(len(level_2))] level_3 = list(RETDFiS.values()) colors_3 = ['#009933' for i in range(len(level_3))] level_4 = list(EoL.values()) colors_4 = ['#ff944d' for i in range(len(level_4))] level_5 = list(WMH.values()) colors_5 = ['#ffcc66' for i in range(len(level_5))] level_6 = list(EC.values()) colors_6 = ['#6666ff' for i in range(len(level_6))] levels = level_1 + level_2 + level_3 + level_4 + level_5 + level_6 colors = colors_1 + colors_2 + colors_3 + colors_4 + colors_5 + colors_6 Sources = [] Targets = [] Values = [] for index, row in group1.iterrows(): Sources.append(levels.index(GiS[row['Generator primary NAICS name']])) Targets.append(levels.index(CoU[row['Generator condition of use']])) Values.append(row['Proportion']) for index, row in group2.iterrows(): Sources.append(levels.index(CoU[row['Generator condition of use']])) Targets.append(levels.index(RETDFiS[row['RETDF primary NAICS name']])) Values.append(row['Proportion']) for index, row in group3.iterrows(): Targets.append(levels.index(WMH[row['EoL activity category under TSCA']])) Sources.append(levels.index(RETDFiS[row['RETDF primary NAICS name']])) Values.append(row['Proportion']) for index, row in group4.iterrows(): Sources.append(levels.index(RETDFiS[row['RETDF primary NAICS name']])) Targets.append(levels.index(EoL[row['EoL activity category under TSCA']])) Values.append(row['Proportion']) for index, row in group5.iterrows(): Sources.append(levels.index(EoL[row['EoL activity category under TSCA']])) Targets.append(levels.index(WMH[row['EoL activity category under waste management hierarchy']])) Values.append(row['Proportion']) for index, row in group6.iterrows(): Sources.append(levels.index(WMH[row['EoL activity category under waste management hierarchy']])) Targets.append(levels.index(EC[row['Environmental compartment']])) Values.append(row['Proportion']) # Sankey diagram fig1 = go.Figure(data=[go.Sankey( node = dict( pad = 35, thickness = 5, line = dict( color = "black", width = 0), label = levels, color = colors ), link = dict( source = Sources, target = Targets, value = Values) )]) fig1.update_layout(plot_bgcolor = '#e8e8e8', paper_bgcolor = '#e8e8e8', width=1000, height=900) fig1.write_image(dir_path + '/Sankey_{}.pdf'.format(ITN)) df_box = df5.iloc[:,[1,5] + list(range(n_cols,Sample + n_cols))] EC_non_cero = list(df_box.loc[~(df_box.iloc[:,2:] == 0.0).all(axis = 1), 'Environmental compartment'].unique()) WMH_non_cero = list(df_box.loc[~(df_box.iloc[:,2:] == 0.0).all(axis = 1), 'EoL activity category under waste management hierarchy'].unique()) df_compartments = {} for compartment in EC_non_cero: df_compartment = pd.DataFrame(columns = ['Management', 'Flow_log', 'Flow']) for management in WMH_non_cero: df_EC_WM = df_box.loc[(df_box['Environmental compartment'] == compartment) & \ (df_box['EoL activity category under waste management hierarchy'] == management)] n_times = df_EC_WM.shape[0]*Sample col = [2.20462*l[0] for l in np.reshape(df_EC_WM.iloc[:,2:].to_numpy(), (n_times, 1))] aux = pd.DataFrame(columns = ['Management', 'Flow']) aux['Flow'] = pd.Series(col) aux['Flow_log'] = np.log(aux['Flow']) aux['Management'] = '<b>' + management + '</b>' df_compartment = pd.concat([df_compartment, aux], ignore_index = True, axis = 0) df_compartments.update({compartment:df_compartment}) # Box color_box = ['#009933', '#ffcc66', '#ff944d', '#ff5050'] fig2 = go.Figure() for idx, compartment in enumerate(EC_non_cero): fig2.add_trace(go.Box( y = list(df_compartments[compartment]['Flow_log']), x = list(df_compartments[compartment]['Management']), name = compartment.capitalize(), boxmean = True, whiskerwidth = 0.1, #notchwidth = 0.1, marker = dict( color = color_box[idx] ) )) fig2.update_layout(xaxis = dict(title = '<b>Waste management</b>', zeroline = False), yaxis = dict(title = '<b>Release, log(lb/yr)</b>', zeroline = False), boxmode='group', paper_bgcolor = '#f5f5f5', plot_bgcolor = '#e8e8e8', width = 1500, height = 1000, shapes = [ go.layout.Shape( type = 'line', x0 = -0.5, y0 = np.log(0.5), x1 = 3.5, y1 = np.log(0.5), line = dict( color = '#6666ff', width = 2, dash = 'dot', ), ), go.layout.Shape( type = 'line', x0 = -0.5, y0 = np.log(10.5), x1 = 3.5, y1 = np.log(10.5), line = dict( color = '#6666ff', width = 2, dash = 'dot', ), ), go.layout.Shape( type = 'line', x0 = -0.5, y0 = np.log(499.5), x1 = 3.5, y1 = np.log(499.5), line = dict( color = '#6666ff', width = 2, dash = 'dot', ), ), go.layout.Shape( type = 'line', x0 = -0.5, y0 = np.log(999.5), x1 = 3.5, y1 = np.log(999.5), line = dict( color = '#6666ff', width = 2, dash = 'dot', ), ) ], legend = go.layout.Legend( bgcolor = 'White', bordercolor = '#6666ff', borderwidth = 1 ) ) fig2.update_xaxes(title_font=dict(size=18)) fig2.update_yaxes(title_font=dict(size=20)) fig2.write_image(dir_path + '/Box_{}.pdf'.format(ITN)) # Histogram df_histogram = pd.DataFrame(columns = ['Management', 'Flow', 'Environmental compartment']) for compartment in EC_non_cero: df_histogram_aux = df_compartments[compartment][['Management', 'Flow']] df_histogram_aux['Management'] = df_histogram_aux['Management'].apply(lambda x: x.replace('<b>','').replace('</b>','')) df_histogram_aux['Environmental compartment'] = compartment df_histogram = pd.concat([df_histogram, df_histogram_aux], axis = 0) df_histogram['Relese code'] = df_histogram.apply(lambda x: Release_code(x['Flow']), axis = 1) df_histogram['Order'] = df_histogram['Relese code'].apply(lambda x: values(x)) df_histogram.sort_values(by=['Order'], ascending = True, inplace = True) color_box = ['#009933', '#ffcc66', '#ff944d', '#ff5050'] trace = [] for compartment in EC_non_cero: for idx, management in enumerate(WMH_non_cero): data = list(df_histogram.loc[(df_histogram['Environmental compartment'] == compartment) & \ (df_histogram['Management'] == management), 'Relese code']) if len(trace) < len(WMH_non_cero): trace.append(go.Histogram(histnorm = 'probability density', x = data, name = management, marker_color = color_box[idx], opacity = 0.75, autobinx = False)) else: trace.append(go.Histogram(histnorm = 'probability density', x = data, marker_color = color_box[idx], opacity = 0.75, showlegend = False)) n_EC = len(EC_non_cero) n_WMH = len(WMH_non_cero) titles = tuple(ec.capitalize() for ec in EC_non_cero) if n_EC < 4: fig3 = make_subplots(rows = n_EC, cols = 1, shared_xaxes = True, subplot_titles = titles) n_trace = 0 row = 0 for tr in trace: n_trace = n_trace + 1 if (n_trace - 1) % n_WMH == 0: row = row + 1 fig3.append_trace(tr, row, 1) else: fig3 = make_subplots(rows = 2, cols = 2, shared_xaxes = True, subplot_titles = titles) n_trace = 0 row = 1 col = 1 n_fig = 0 for tr in trace: n_trace += 1 if (n_trace - 1) % n_WMH == 0: n_fig += 1 if n_fig == 1: row = 1 col = 1 elif n_fig == 2: row = 1 col = 2 elif n_fig == 3: row = 2 col = 1 else: row = 2 col = 2 fig3.append_trace(tr, row, col) fig3.update_layout(paper_bgcolor = '#f5f5f5', plot_bgcolor = '#e8e8e8', legend = go.layout.Legend( bgcolor = 'White', bordercolor = '#6666ff', borderwidth = 1)) fig3.write_image(dir_path + '/Histogram_{}.pdf'.format(ITN))
axis=1) movments.rename(columns={'size': 'value'}, inplace=True) pal = sns.color_palette('Set2', len(world) + len(isr_cities)).as_hex() all_names = list(isr_cities) + world label_2_color = {} for i, name in enumerate(all_names): label_2_color[name] = pal[i] label_list = list(layer_1.keys()) + list(layer_2.keys()) + list(layer_3.keys()) fig = go.Figure(data=[ go.Sankey( node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=list(layer_1.keys()) + list(layer_2.keys()) + list(layer_3.keys()), color=[label_2_color[n] for n in label_list]), link=dict( source=movments[ 'source'], # indices correspond to labels, eg A1, A2, A2, B1, ... target=movments['target'], value=movments['value'], #color = [new_pal[i] for i in g['source']] )) ]) #fig.write_image(r"/Users/daniellemiller/Google Drive/covid19/paper/COVID19_phylodynamics/figures/transmission_patterns.png", scale=10, width=1000) fig.show()
data['data'][0]['node']['color'] = [ 'rgba(255,0,255, 0.8)' if color == "magenta" else color for color in data['data'][0]['node']['color'] ] data['data'][0]['link']['color'] = [ data['data'][0]['node']['color'][src].replace("0.8", str(opacity)) for src in data['data'][0]['link']['source'] ] fig = go.Figure(data=[ go.Sankey( valueformat=".0f", #valuesuffix = "TWh", # Define nodes node=dict(pad=15, thickness=15, line=dict(color="black", width=0.5), label=data['data'][0]['node']['label'], color=data['data'][0]['node']['color']), # Add links link=dict(source=data['data'][0]['link']['source'], target=data['data'][0]['link']['target'], value=data['data'][0]['link']['value'], label=data['data'][0]['link']['label'], color=data['data'][0]['link']['color'])) ]) fig.update_layout(title_text="Imagined CreAItures", font_size=14) fig.show()
def new_cnx_convergence(kw_pairs, cat, year, mode): """ Until the year of connection the shortest path for each given keyword pair is observed. """ # Will store the number of pairs that have a specific distance in a year # -1 for no connection # -2 for at least one keyword not yet in graph # first the distances of each keyword pair are saved for each year chunk pair_dists = [list() for i in range(len(kw_pairs))] # construct list of labels distances = ["not in graph", "unconnected" ] + [str(i) for i in range(THRESHOLD)] + [">=" + str(THRESHOLD)] # add years for readability labels = [d + " : " + str(y) for y in YEARS for d in distances] for i, y_chunk in enumerate(YEARS): graph = build_graph(GRAPH_FILES.format(cat, y_chunk)) for i, pair in enumerate(kw_pairs): n1, n2 = pair # all distances for all years are consecutively numbered. -> to match a label in 'labels' if n1 not in graph or n2 not in graph: d = 0 elif not nx.has_path(graph, n1, n2): d = 1 else: path = nx.shortest_path_length(graph, n1, n2) if path < THRESHOLD: d = 2 + path # first two indices are taken by 'not in graph' and 'unconnected' else: d = 2 + THRESHOLD # Save index that matches the right label in 'labels' pair_dists[i].append(i * (len(distances)) + d) # convert the results for plotly sankey diagrams # source contains start distance (label); target contains goal distance (label), value contains the number of links that changed distance from source to target in a specific year # -> data is always saved as a triple dist_changes = {"source": list(), "target": list(), "value": list()} transfer_indices = dict( ) # saves position of source-target pairs in dist_changes for pair_dist in pair_dists: for dist in range(len(pair_dist) - 1): source_target = (pair_dist[dist], pair_dist[dist + 1]) if source_target not in transfer_indices: # get next free position transfer_indices[source_target] = len(transfer_indices) dist_changes["source"].append(source_target[0]) dist_changes["target"].append(source_target[1]) dist_changes["value"].append(0) dist_changes["value"][transfer_indices[source_target]] += 1 # Save results fig = go.Figure(data=[ go.Sankey( node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels, color="blue"), link=dict( source=dist_changes[ "source"], # indices correspond to labels, eg A1, A2, A2, B1, ... target=dist_changes["target"], value=dist_changes["value"])) ]) fig.update_layout( title_text= "Distance of {} sampled keyword pairs that get connected in {}".format( len(kw_pairs), year), font_size=10) plotly.io.write_html(fig, PLOT_DIR.format( "pairs_before_cnx_{}_{}_{}.html".format( cat, year, mode)), include_plotlyjs="cdn") print("Saved as " + PLOT_DIR.format( "pairs_before_cnx_{}_{}_{}.html".format(cat, year, mode)))
for i, col in enumerate(cols): if i != len(cols) - 1: n_col = cols[i + 1] for col_un in imp_dfs_cl[col].unique(): for n_col_un in imp_dfs_cl[n_col].unique(): if (col_un in list(nodes['label'])) and (n_col_un in list(nodes['label'])): source.append(nodes[nodes['label'] == col_un].index[0]) target.append(nodes[nodes['label'] == n_col_un].index[0]) values.append(imp_dfs_cl.loc[(imp_dfs_cl[col] == col_un) & (imp_dfs_cl[n_col] == n_col_un)][to_group].sum()) print(f"> Renaming columns") nodes = nodes.replace(rename_mask) print(f"> Creating sankey diagram with {len(source)} sources, {len(target)} targets and {len(values)} values") fig = go.Figure(data=[go.Sankey( node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=nodes['label'], # ["A1", "A2", "B1", "B2", "C1", "C2"], color='#245C7C' ), link=dict( source=source, target=target, value=values ))]) fig.update_layout(title_text=f"Summed {to_group.title()}", font_size=10) offline.plot(fig, filename='Content/sankey.html') print("Finished")
names.append(s.replace('MRS_TX_3', 'mRS_90days')) else: names.append(s) source = [] target = [] value = [] for i in ['0', '1', '2', '3', '4', '5', '6']: for j in ['0', '1']: source.append(label.index('MRS_TX_1_'+i)) target.append(label.index('MRS_TX_3_'+j)) value.append(data[(data['MRS_TX_1_'+i] == 1) & (data['MRS_TX_3_'+j] == 1)].shape[0]) fig = go.Figure(data=[go.Sankey( node = dict( pad = 15, thickness = 20, line = dict(color = "black", width = 0.5), label = names ), link = dict( source = source, # indices correspond to labels, eg A1, A2, A2, B1, ... target = target, value = value ))]) fig.update_layout(title_text="", font_size=10) fig.show()
def sankey_diagram(self, output_file): nodes = [ "Hospitalisation", "ICU Admission", "Non-ICU Wards", "Poor Outcomes", "Mild Outcomes", "Death", "Discharged" ] hos = set(self._df['name_index'].tolist()) hos2icu = set( self._df[self._df['days_in_icu'].notnull()]['name_index'].tolist()) hos2nonicu = hos - hos2icu poor_outcomes = set( self._df[self._df['poor_prognosis'] > 0]['name_index'].tolist()) mild_outcomes = hos - poor_outcomes deaths = set(self._df[self._df['death'] > 0]['name_index'].tolist()) # hos2poor = poor_outcomes - hos2icu # hos2death = deaths - poor_outcomes - hos2icu # hos2dis = hos - hos2death - hos2poor - hos2icu icu2death = deaths & hos2icu - poor_outcomes icu2poor = poor_outcomes & hos2icu icu2mild = hos2icu - icu2poor - icu2death nonicu2poor = poor_outcomes - hos2icu nonicu2death = deaths - poor_outcomes - hos2icu nonicu2mild = hos - nonicu2death - nonicu2poor - hos2icu mild2dis = icu2mild | nonicu2mild poor2death = poor_outcomes & deaths poor2dis = poor_outcomes - poor2death links = [{ "s": "Hospitalisation", "t": "ICU Admission", "n": len(hos2icu) }, { "s": "Hospitalisation", "t": "Non-ICU Wards", "n": len(hos2nonicu) }, { "s": "Non-ICU Wards", "t": "Poor Outcomes", "n": len(nonicu2poor) }, { "s": "Non-ICU Wards", "t": "Death", "n": len(nonicu2death) }, { "s": "Non-ICU Wards", "t": "Mild Outcomes", "n": len(nonicu2mild) }, { "s": "ICU Admission", "t": "Poor Outcomes", "n": len(icu2poor) }, { "s": "ICU Admission", "t": "Death", "n": len(icu2death) }, { "s": "ICU Admission", "t": "Mild Outcomes", "n": len(icu2mild) }, { "s": "Mild Outcomes", "t": "Discharged", "n": len(mild2dis) }, { "s": "Poor Outcomes", "t": "Death", "n": len(poor2death) }, { "s": "Poor Outcomes", "t": "Discharged", "n": len(poor2dis) }] colors = { "toICU": "rgba(245, 182, 66, 0.2)", "toPoor": "rgba(240, 151, 139, 0.2)", "toNonICU": "rgba(124, 159, 191, 0.2)", "toDeath": "rgba(105, 81, 78, 0.2)", "toMild": "rgba(124, 191, 144, 0.2)", "toDischarge": "rgba(124, 191, 144, 0.2)", "toDischarge2": "rgba(54, 153, 131, 0.2)" } s = [] t = [] v = [] for l in links: s.append(nodes.index(l['s'])) t.append(nodes.index(l['t'])) v.append(l['n']) fig = go.Figure(data=[ go.Sankey(node=dict(pad=15, thickness=20, line=dict(color="black", width=0.5), label=nodes, color=[ "#f5bc42", "#f57b42", '#425b96', "#f55742", "#829bba", "#61352f", "#37b05b" ]), link=dict(source=s, target=t, value=v, color=[ colors['toICU'], colors['toNonICU'], colors['toPoor'], colors['toDeath'], colors['toDischarge'], colors['toPoor'], colors['toDeath'], colors['toMild'], colors['toDischarge'], colors['toDeath'], colors['toDischarge2'] ])) ]) fig.update_layout(title_text="Pathways of China Cohort", font_size=10) # fig.show() fig.write_image(output_file)
def plot_sankey(all_results, live=False, filename="communities_sankey.html", time_index=None): """Plot Sankey diagram of communities accros time (plotly only). Args: all_results (dict): results from run function live (bool): if True, interactive figure will appear in browser filename (str): filename to save the plot time_index (bool): plot time of indices """ import plotly.graph_objects as go from plotly.offline import plot as _plot sources = [] targets = [] values = [] shift = 0 if not time_index: all_results["community_id_reduced"] = all_results["community_id"] else: all_results["community_id_reduced"] = [ all_results["community_id"][i] for i in time_index ] for i in range(len(all_results["community_id_reduced"]) - 1): community_source = np.array(all_results["community_id_reduced"][i]) community_target = np.array(all_results["community_id_reduced"][i + 1]) source_ids = set(community_source) target_ids = set(community_target) for source in source_ids: for target in target_ids: value = sum( community_target[community_source == source] == target) if value > 0: values.append(value) sources.append(source + shift) targets.append(target + len(source_ids) + shift) shift += len(source_ids) layout = go.Layout(autosize=True) fig = go.Figure( data=[ go.Sankey( node=dict( pad=1, thickness=1, line=dict(color="black", width=0.0), ), link=dict(source=sources, target=targets, value=values), ) ], layout=layout, ) _plot(fig, filename=filename) if live: fig.show()
def plot_factor_diagram(factor): import plotly.graph_objects as go from .factor import BaseFactor, CustomFactor from .datafactor import DataFactor color = [ "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)", "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)", "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)", "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)", "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)", "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)", "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)", "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)", "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)", "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)", "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)", "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)", "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)", "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)", "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)", "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)", "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)", "rgba(148, 103, 189, 0.8)", "magenta", "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)", "rgba(188, 189, 34, 0.8)", "rgba(23, 190, 207, 0.8)", "rgba(31, 119, 180, 0.8)", "rgba(255, 127, 14, 0.8)", "rgba(44, 160, 44, 0.8)", "rgba(214, 39, 40, 0.8)", "rgba(148, 103, 189, 0.8)", "rgba(140, 86, 75, 0.8)", "rgba(227, 119, 194, 0.8)", "rgba(127, 127, 127, 0.8)" ] factor_id = dict() label = [] source = [] target = [] value = [] line_label = [] def add_node(this, parent_label_id, parent_label, parent_win): class_id = id(this) if class_id in factor_id: this_label_id = factor_id[class_id] else: this_label_id = len(label) if isinstance(this, DataFactor): label.append(this.inputs[0]) else: label.append(type(this).__name__) if parent_label_id is not None: source.append(parent_label_id) target.append(this_label_id) value.append(parent_win) line_label.append(parent_label) if class_id in factor_id: return if isinstance(this, CustomFactor): this_win = this.win else: this_win = 1 factor_id[class_id] = this_label_id if isinstance(this, CustomFactor): if this.inputs: for upstream in this.inputs: if isinstance(upstream, BaseFactor): add_node(upstream, this_label_id, 'inputs', this_win) if this._mask is not None: add_node(this._mask, this_label_id, 'mask', this_win) add_node(factor, None, None, None) fig = go.Figure(data=[ go.Sankey( valueformat=".0f", valuesuffix="win", node=dict(pad=15, thickness=15, line=dict(color="black", width=0.5), label=label, color=list(islice(cycle(color), len(label)))), # Add links link=dict( source=source, target=target, value=value, label=line_label)) ]) fig.update_layout(title_text="Factor Diagram") fig.show()
e2 = df['estado2'].value_counts().sort_index().values e1 = e1/e1.sum()*100 e2 = e2/e2.sum()*100 values = np.concatenate((e1, e2), axis=None).tolist() fig = go.Figure(data=[go.Sankey( node = dict( pad = 15, thickness = 20, line = dict(color = "black", width = 0.5), label = ["necesita aprobación", "no necesita aprobación", "obtuvo aprobación", "no obtuvo aprobación", "oportunidad ganada", "oportunidad perdida"], #color= ['LightSkyBlue', 'MediumPurple', "blue", "blue", "blue", "blue", "red"] #color = "blue" ), link = dict( source = [0, 0, 1, 2, 2, 3, 3], # indices correspond to labels, eg A1, A2, A2, B1, ... target = [2, 3, 3, 4, 5, 4, 5], #value = [9890, 3803, 3254, 6404, 3971, 3443, 3129] value = values ))]) fig.update_layout( title={ 'text': "Analisis de aprobación de precio total de oportunidades", 'y':0.9, 'x':0.5, 'xanchor': 'center', 'yanchor': 'top'},
max_len = max(map(len, next_level_numbers)) node_label.append(f'{label}{"X" * max_len}') if max_len < 2: continue if next_level_numbers and parent_node_id is not None: links_source.append(parent_node_id) link_target.append(node_id) link_value.append(len(next_level_numbers)) add_to_sankey(next_level_numbers, parent_node_id=node_id) add_to_sankey(numbers=dns) fig = go.Figure(data=[ go.Sankey(node=dict(pad=15, thickness=20, line=dict(color='black', width=0.5), label=node_label, color='blue'), link=dict(source=links_source, target=link_target, value=link_value)) ]) fig.update_layout(title_text='Sankey', font_size=10) fig.show() @menu_register('Find intra-site dialing translation patterns') def menu_find_intrasite_translation_patterns(self): tps = self.proxy.translation_pattern.list print(f'Found {len(tps)} translation patterns') tp_by_len: Dict[int, List[TranslationPattern]] = defaultdict(list) names = [] parents = [] for tp in tps:
] # AI ML Fradude dete Recommendation Clustering NLP image recognition medical diagn arbitrage forecasting predictive main epidemiology source = [ 0, 0, 0, 1, 1, 1, 1, 1, 1, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 4, 5, 6, 7, 8, 9, 3, 3, 3, 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, 20, 20, 20, 20, 22, 22 ] target = [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 19, 19, 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, 20, 21, 22, 10, 11, 12, 15, 16, 17, 18, 10, 11, 12, 15, 16, 17, 18, 11, 18 ] value = [ 20, 0, 20, 6, 6, 3, 1, 2, 2, 0.8, 1.45, .6, 0, .4, .33, 0.8, 1.45, 0, 0, 0, .33, .2, 1.45, 0, 0, 0, 0, 0.8, 0, .6, 0, 0, 0, 0.8, 0, .6, .9, .4, 0, 0.8, 0, .6, 0, .4, .33, 0.8, 1.45, 0, 0, .4, 0, 0.4, 0, .3, .1, .2, .15, 0.4, 0, .3, .1, .2, .15, 0.2, 0.2, 0, 0, 0, .33, 6, 8, 6, 8 / 7, 8 / 7, 8 / 7, 8 / 7, 8 / 7, 8 / 7, 8 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7, 6 / 7, 3, 3 ] # data to dict, dict to sankey link = dict(source=source, target=target, value=value) node = dict(label=label, pad=50, thickness=5) data = go.Sankey(link=link, node=node) # plot fig = go.Figure(data) plot(fig)
# Sankey Diagram html.Div([ html.H1(children='Sankey Diagram for Pokemon Dataset', style={'textAlign': 'center'}), dcc.Graph( id='snakey_diagram', figure=go.Figure(data=[ go.Sankey( valueformat=".0f", valuesuffix=" pokemon", # Define nodes node=dict( pad=15, thickness=15, #line = dict(color = "black", width = 0.5), label=cat_list, color='darkcyan'), # Add links link=dict(source=sources, target=targets, value=values, color=flow_color #label = data['data'][0]['link']['label'] )) ])), ]) ]) # update the k-means graph @app.callback(Output('kmeans_with_k', 'figure'),
- not providing an indication of both number of subjects and number of ROIs. """] fig1 = go.Figure(data=[go.Sankey( arrangement = "freeform", node = dict( pad = 80, thickness = 10, line = dict(color = "black", width = 0.5), label = ["Main records identified (database searching)", "Additional records (reviews)", "Records screened", "Records excluded", "Full-text articles assessed for eligibility", "Full-text articles excluded", "Studied included in the literature overview", "Studies included in the meta-analysis"], x = [0, 0, 0.4, 0.6, 0.5, 0.8, 0.7, 1], y = [0, 0, 0.5, 0.8, 0.15, 0.05, 0.4, 0.6], hovertemplate = "%{label}<extra>%{value}</extra>", color = ["darkblue","darkblue","darkblue","darkred","darkgreen","darkred","darkgreen","darkgreen"] ), link = dict( source = [0, 1, 2, 2, 4, 4, 6], target = [2, 2, 3, 4, 5, 6, 7], value = [688, 1, 597, 92, 34, 58, 43], customdata = screening_info, hovertemplate = "%{customdata}", ))]) fig1.update_layout(title = dict(text="Figure 1 - Review methodology"), width=650,
def SANK(data): updateGene(data) if len(data['genes'])==0: tmp, D = getObs(data) D = D.apply(lambda x:x.apply(lambda y:x.name+":"+y)) else: adata = createData(data) D = pd.concat([adata.obs.apply(lambda x:x.apply(lambda y:x.name+":"+y)), adata.to_df().apply(lambda x:pd.cut(x,int(data['sankBin'])).apply(lambda y:x.name+":"+'%.1f_%.1f'%(y.left,y.right)))], axis=1,sort=False) D = D.astype('str').astype('category') if 'name_0' in D.columns: del D['name_0'] colName =['Set1','Set3','viridis'] labels = [] cols = [] colindex = 0 for gID in D.columns: gNames = list(D[gID].unique()) labels += gNames if len(gNames) <10: cols += sns.color_palette(colName[colindex%2],len(gNames)).as_hex() colindex += 1 else: cols += sns.color_palette(colName[2],len(gNames)).as_hex() sIDs =[] dIDs =[] v=[] Dnames = data['sankOrder']#list(D.columns) #maxGrp = 0 #ppr.pprint(Dnames) for i in range(len(Dnames)-1): oneName = Dnames[i:i+2] #maxGrp = max(maxGrp,len(D[oneName[0]].unique())) summaryOne = D.groupby(oneName).size().reset_index(name='Count') summaryOne=summaryOne[summaryOne['Count']>0] sIDs += list(summaryOne[oneName[0]].apply(lambda x: labels.index(x))) dIDs += list(summaryOne[oneName[1]].apply(lambda x: labels.index(x))) v += list(summaryOne['Count']) data_trace = dict( type='sankey', domain=dict(x=[0,1],y=[0,1]), orientation='h', valueformat = ".0f", node = dict( pad = 10, thickness = 15, line = dict( color = "black", width = 0.5 ), label = labels, color = cols ), link = dict( source = sIDs, target = dIDs, value = v ) ) ## if the image is requested if 'imgSave' in data.keys(): layout = dict( font = dict(size=int(data['figOpt']['fontsize'])), height= int(data['imgH']), width = int(data['imgW'])*D.shape[1] ) fig = go.Figure(data=[go.Sankey(data_trace)],layout=layout) img = plotIO.to_image(fig,data['imgSave']) return base64.encodebytes(img).decode('utf-8') layout = dict( font = dict(size=int(data['figOpt']['fontsize'])), height= int(data['imgH']), width = int(data['imgW'])*D.shape[1], updatemenus= [ dict( y=0.9, buttons=[ dict( label='Thick', method='restyle', args=['node.thickness', 15] ), dict( label='Thin', method='restyle', args=['node.thickness', 8] ) ] ), dict( y=0.8, buttons=[ dict( label='Small gap', method='restyle', args=['node.pad', 15] ), dict( label='Large gap', method='restyle', args=['node.pad', 20] ) ] ), dict( y=0.7, buttons=[ dict( label='Snap', method='restyle', args=['arrangement', 'snap'] ), dict( label='Perpendicular', method='restyle', args=['arrangement', 'perpendicular'] ), dict( label='Freeform', method='restyle', args=['arrangement', 'freeform'] ), dict( label='Fixed', method='restyle', args=['arrangement', 'fixed'] ) ] ), dict( y=0.6, buttons=[ dict( label='Horizontal', method='restyle', args=['orientation','h']#{,'height':700,'width':250*D.shape[1]} ), dict( label='Vertical', method='restyle', args=['orientation','v']#{'orientation': 'v','height':250*D.shape[1],'width':700} ) ] ) ] ) fig = go.Figure(data=[go.Sankey(data_trace)],layout=layout) div = plotIO.to_html(fig) return div#[div.find('<div>'):(div.find('</div>')+6)]
def sankey(): result = node_trace() fig = go.Figure(data=[ go.Sankey( valueformat=".0f", node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=[ "0", "state1", "state2", "state3", "state4", "state5", "state1", "state2", "state3", "state4", "state5", "state1", "state2", "state3", "state4", "state5", "state1", "state2", "state3", "state4", "state5", "state1", "state2", "state3", "state4", "state5" ], #rgba(r,g,b,a) red green blue alpha color=[ "rgba(0,0,0,0.8)", #the first parmeter in "label" and "color" no means "rgba(82,12,172,0.8)", #"rgba(190,190,190,0.6)", "rgba(190,190,190,0.8)", #"rgba(190,190,190,0.6)", "rgba(6,130,196,0.8)", #"rgba(190,190,190,0.6)", "rgba(30,122,51,0.8)", #"rgba(190,190,190,0.6)", "rgba(156,9,13,0.8)", #"rgba(190,190,190,0.6)", "rgba(82,12,172,0.8)", #"rgba(190,190,190,0.6)", "rgba(190,190,190,0.8)", #"rgba(190,190,190,0.6)", "rgba(6,130,196,0.8)", #"rgba(190,190,190,0.6)", "rgba(30,122,51,0.8)", #"rgba(190,190,190,0.6)", "rgba(156,9,13,0.8)", #"rgba(190,190,190,0.6)", "rgba(82,12,172,0.8)", #"rgba(190,190,190,0.6)", "rgba(190,190,190,0.8)", #"rgba(190,190,190,0.6)", "rgba(6,130,196,0.8)", #"rgba(190,190,190,0.6)", "rgba(30,122,51,0.8)", #"rgba(190,190,190,0.6)", "rgba(156,9,13,0.8)", #"rgba(190,190,190,0.6)", "rgba(82,12,172,0.8)", #"rgba(190,190,190,0.6)", "rgba(190,190,190,0.8)", #"rgba(190,190,190,0.6)", "rgba(6,130,196,0.8)", #"rgba(190,190,190,0.6)", "rgba(30,122,51,0.8)", #"rgba(190,190,190,0.6)", "rgba(156,9,13,0.8)", #"rgba(190,190,190,0.6)", "rgba(82,12,172,0.8)", #"rgba(190,190,190,0.6)", "rgba(190,190,190,0.8)", #"rgba(190,190,190,0.6)", "rgba(6,130,196,0.8)", #"rgba(190,190,190,0.6)", "rgba(30,122,51,0.8)", #"rgba(190,190,190,0.6)", "rgba(156,9,13,0.8)", #"rgba(190,190,190,0.6)", ]), link=dict( source=result[ 0], # indices correspond to labels, eg A1, A2, A2, B1, ... target=result[1], value=result[2], color=[ "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(82,12,172,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(190,190,190,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(6,130,196,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(30,122,51,0.5)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", "rgba(156,9,13,0.8)", ]), #textfont= dict(size=1) ) ]) fig.update_layout(title_text="Basic Sankey Diagram", font_size=10) #fig.write_image("./fig1.png", width=600, height=600) fig.show()
def _log_plot(log_data, savetofile, checkpoint_lines: List[str], pipeline_sections): import matplotlib.pyplot as plt # 1 SECOND IMPORT important_text = [] important_time = [] for lin, file_line, t in log_data: for cl in checkpoint_lines: if cl in file_line: important_text.append(cl) important_time.append(t) @dataclass class LoggedPipelineSection: start: float end: float thread: str process: str pid: str label: str # subsections: Optional[List] = None # does do anything yet index: int = -1 # MUST SET LATER source: Optional[int] = None # might set later sourceSec: Optional = None x: int = 0 # set later time_amount: Optional[float] = None time_rel: Optional[float] = None time_amount_rel: Optional[float] = None y_center: Optional[float] = None color: str = 'orange' loggedSections = [] for sec, v in listitems(pipeline_sections): if v['start'] and v['end']: loggedSections.append(LoggedPipelineSection( start=v['start'], end=v['end'], label=sec, thread=v['thread'], pid=v['pid'], process=v['process'] )) total = log_data[-1][2] important_text = [shorten_str(s, 20) for s in important_text] fig, axs = plt.subplots(nrows=1) table_ax = axs table_ax.set_axis_off() important_time = [round(t, 2) for t in important_time] if important_time: table = table_ax.table( cellText=[[str(t)] for t in important_time], rowLabels=important_text, colLabels=['time'], rowColours=["palegreen"] * (len(important_text) + 1), colColours=["palegreen"] * 2, colWidths=[0.5, 0.5], cellLoc='center', loc='center' ) table_ax.set_title('Important Logs', fontweight="bold") time_amounts = [] time_rels = [] time_amount_rels = [] y_centers = [] last = 0 for t in important_time: time_amounts.append(t - last) time_rels.append(t / total) time_amount_rels.append(time_amounts[-1] / total) y_centers.append(time_rels[-1] - (time_amount_rels[-1] / 2)) last = t sizes = important_time loggedSectionsTotal = loggedSections[0].end - loggedSections[0].start for i, sec in enum(loggedSections): sec.time_amount = sec.end - sec.start # no need for time_rel? sec.time_amount_rel = sec.time_amount / loggedSectionsTotal sec.y_center = (((sec.end - (sec.time_amount / 2)) - loggedSections[0].start) / loggedSectionsTotal) sec.index = i loggedSections[0].y_center = 0.5 for sec in loggedSections: candidates = [] for secsec in loggedSections: if sec.start > secsec.start: candidates.append(secsec) candidates2 = [] for cand in candidates: if sec.end < cand.end: candidates2.append(cand) elif sec.start > secsec.end: pass # OVERLAP! # assert sec.start > secsec.end # throws error if there is overlap but not nesting if candidates2: secsec = max(candidates2, key=lambda x: x.start) sec.source = secsec.index sec.sourceSec = secsec def count_recurse(sec): if sec.sourceSec: return 1 + count_recurse(sec.sourceSec) else: return 0 for sec in loggedSections: sec.x = count_recurse(sec) colors = ['gold', 'yellowgreen', 'lightcoral', 'lightskyblue'] while len(colors) < len(sizes): colors = colors + colors colors = colors[:len(sizes)] if important_text: plt.savefig(savetofile.abspath) plt.clf() maxX = max([sec.x for sec in loggedSections]) xstep = normX = 1 / maxX for sec in loggedSections: sec.x = sec.x / maxX labels = [sec.label for sec in loggedSections] values = [sec.time_amount for sec in loggedSections if sec.source is not None] if True: for i in itr(labels): if i > 0: labels[i] = labels[i] + f' ({format_sec_dur(values[i - 1])})' labels[0] = labels[0] + f' ({format_sec_dur(loggedSections[0].time_amount)})' jitter_step = xstep / 10 keepJittering = True while keepJittering: for sec, secsec in unique_pairs(loggedSections): if sec.x == secsec.x: if sec.thread != secsec.thread or sec.process != secsec.process or sec.pid != secsec.pid: secsec.color = 'blue' secsec.x += jitter_step break keepJittering = False import plotly.graph_objects as go fig = go.Figure(data=[go.Sankey( # arrangement="fixed", # no cutoff, but overlap arrangement="snap", # no overlap, but cutoff # arrangement = "perpendicular", # overlap and cutoff (less of both) # arrangement="freeform",# both overlap and cutoff node=dict( pad=15, thickness=20, line=dict(color="black", width=0.5), label=labels, y=[sec.y_center for sec in loggedSections], x=(arr([sec.x for sec in loggedSections]) * 1.0).tolist(), color=[sec.color for sec in loggedSections] ), link=dict( source=[sec.source for sec in loggedSections if sec.source is not None], target=list(range(1, (len(loggedSections)))), value=values ))]) fig.update_layout( font_size=20, ) html = _get_fig(fig, full_html=True, include_plotlyjs=True) File(savetofile).res_pre_ext("_sankey").resrepext('html').write(html)
diagram.save('R410A_logph.svg') # %% exergy analysis ean = ExergyAnalysis(network=nw, E_F=[power, heat_geo], E_P=[heat_cons]) ean.analyse(pamb, Tamb) print("\n##### EXERGY ANALYSIS #####\n") ean.print_results() # create sankey diagram links, nodes = ean.generate_plotly_sankey_input() fig = go.Figure( go.Sankey(arrangement="snap", node={ "label": nodes, 'pad': 11, 'color': 'orange' }, link=links)) plot(fig, filename='R410A_sankey.html') # %% plot exergy destruction # create data for bar chart comps = ['E_F'] E_F = ean.network_data.E_F # top bar E_D = [0] # no exergy destruction in the top bar E_P = [E_F] # add E_F as the top bar for comp in ean.component_data.index: # only plot components with exergy destruction > 1 W