def to_pivot_topic_network( self, *, pivot_key_id: str, pivot_key_name: str = "category", pivot_key_map: dict[int, str], aggregate: str, threshold: float, topic_labels: dict[int, str] = True, ) -> DocumentTopicsCalculator: data: pd.DataFrame = self.data # .set_index('document_id') network_data: pd.DataFrame = ( data.groupby([pivot_key_id, 'topic_id']).agg([np.mean, np.max])['weight'].reset_index() ) network_data.columns = [pivot_key_id, 'topic_id', 'mean', 'max'] network_data = network_data[(network_data[aggregate] > threshold)].reset_index() if len(network_data) == 0: return network_data network_data[aggregate] = pu.clamp_values(list(network_data[aggregate]), (0.1, 1.0)) # type: ignore network_data[pivot_key_name] = network_data[pivot_key_id].apply(pivot_key_map.get) network_data['weight'] = network_data[aggregate] network_data.drop(columns=['mean', 'max'], inplace=True) if topic_labels is not None: network_data['topic_id'] = network_data['topic_id'].apply(topic_labels.get) self.data = network_data return self
def setup_node_size(nodes, node_size, node_size_range): if node_size is None: node_size = node_size_range[0] if node_size in nodes.keys() and node_size_range is not None: nodes['clamped_size'] = pu.clamp_values(nodes[node_size], node_size_range) node_size = 'clamped_size' return node_size
def update(self) -> pd.DataFrame: network_data: pd.DataFrame = self.compute() network_data["weight"] = pu.clamp_values(list(network_data["weight"]), (0.1, 2.0)) di: pd.DataFrame = self.inferred_topics.document_index.pipe( pu.set_index, columns='document_id')[["document_name"]] network_data = network_data.pipe( pu.set_index, columns='document_id').merge(di, left_index=True, right_index=True) network_data["title"] = network_data["document_name"] if len(network_data) == 0: raise pu.EmptyDataError() return network_data
def get_positioned_nodes_as_dict( G: nx.Graph, layout: NodesLayout, node_size: str, node_size_range: Optional[Tuple[Number, Number]] ) -> dict: nodes = get_positioned_nodes(G, layout) if node_size in nodes.keys() and node_size_range is not None: nodes['clamped_size'] = clamp_values(nodes[node_size], node_size_range) node_size = 'clamped_size' label_y_offset = 'y_offset' if node_size in nodes.keys() else node_size + 8 if label_y_offset == 'y_offset': nodes['y_offset'] = [y + r for (y, r) in zip(nodes['y'], [r / 2.0 + 8 for r in nodes[node_size]])] nodes = {k: list(nodes[k]) for k in nodes} return nodes
def plot( # pylint: disable=W0102 network, layout, scale=1.0, # pylint: disable=unused-argument threshold=0.0, node_description=None, node_size=5, node_size_range=[20, 40], weight_scale=5.0, normalize_weights=True, node_opts=None, line_opts=None, text_opts=None, element_id='nx_id3', figsize=(900, 900), tools=None, palette=DFLT_PALETTE, # pylint: disable=unused-argument **figkwargs, ): if threshold > 0: network = nu.get_sub_network(network, threshold) edges = nu.get_positioned_edges(network, layout) if normalize_weights and 'weight' in edges.keys(): max_weight = max(edges['weight']) edges['weight'] = [float(x) / max_weight for x in edges['weight']] if weight_scale != 1.0 and 'weight' in edges.keys(): edges['weight'] = [weight_scale * float(x) for x in edges['weight']] # edges = dict(source=u, target=v, xs=xs, ys=ys, weights=weights) nodes = nu.get_positioned_nodes(network, layout) # node_size = setup_node_size(nodes, node_size, node_size_range) if node_size in nodes.keys() and node_size_range is not None: nodes['clamped_size'] = pu.clamp_values(nodes[node_size], node_size_range) node_size = 'clamped_size' label_y_offset = 'y_offset' if node_size in nodes.keys() else node_size + 8 if label_y_offset == 'y_offset': nodes['y_offset'] = [ y + r for (y, r) in zip(nodes['y'], [r / 2.0 + 8 for r in nodes[node_size]]) ] edges = {k: list(edges[k]) for k in edges} nodes = {k: list(nodes[k]) for k in nodes} edges_source: ColumnDataSource = ColumnDataSource(edges) nodes_source: ColumnDataSource = ColumnDataSource(nodes) node_opts = pu.extend(DFLT_NODE_OPTS, node_opts or {}) line_opts = pu.extend(DFLT_EDGE_OPTS, line_opts or {}) p = figure(plot_width=figsize[0], plot_height=figsize[1], tools=tools or TOOLS, **figkwargs) p.xgrid.grid_line_color = None p.ygrid.grid_line_color = None _ = p.multi_line(xs='xs', ys='ys', line_width='weights', source=edges_source, **line_opts) # pylint: disable=too-many-function-args r_nodes = p.circle(x='x', y='y', size=node_size, source=nodes_source, **node_opts) # pylint: disable=too-many-function-args if 'fill_color' in nodes.keys(): r_nodes.glyph.fill_color = 'fill_color' if node_description is not None: text_source = ColumnDataSource( dict(text_id=node_description.index, text=node_description)) p.add_tools( bokeh.models.HoverTool( renderers=[r_nodes], tooltips=None, callback=wu.glyph_hover_callback(nodes_source, 'node_id', text_source, element_id=element_id), )) label_opts = { **DFLT_TEXT_OPTS, **dict(y_offset=label_y_offset, text_color='black', text_baseline='bottom'), **(text_opts or {}), } # if label_opts.get('text'): # nodes_source.data[label_opts.get('text')] = [ str(x) for x in nodes_source.data[label_opts.get('text')] ] p.add_layout(bokeh.models.LabelSet(source=nodes_source, **label_opts)) return p