示例#1
0
    def to_pivot_topic_network(
        self,
        *,
        pivot_key_id: str,
        pivot_key_name: str = "category",
        pivot_key_map: dict[int, str],
        aggregate: str,
        threshold: float,
        topic_labels: dict[int, str] = True,
    ) -> DocumentTopicsCalculator:

        data: pd.DataFrame = self.data  # .set_index('document_id')

        network_data: pd.DataFrame = (
            data.groupby([pivot_key_id, 'topic_id']).agg([np.mean, np.max])['weight'].reset_index()
        )
        network_data.columns = [pivot_key_id, 'topic_id', 'mean', 'max']
        network_data = network_data[(network_data[aggregate] > threshold)].reset_index()

        if len(network_data) == 0:
            return network_data

        network_data[aggregate] = pu.clamp_values(list(network_data[aggregate]), (0.1, 1.0))  # type: ignore
        network_data[pivot_key_name] = network_data[pivot_key_id].apply(pivot_key_map.get)
        network_data['weight'] = network_data[aggregate]
        network_data.drop(columns=['mean', 'max'], inplace=True)

        if topic_labels is not None:
            network_data['topic_id'] = network_data['topic_id'].apply(topic_labels.get)

        self.data = network_data

        return self
示例#2
0
文件: plot.py 项目: humlab/penelope
def setup_node_size(nodes, node_size, node_size_range):

    if node_size is None:
        node_size = node_size_range[0]

    if node_size in nodes.keys() and node_size_range is not None:
        nodes['clamped_size'] = pu.clamp_values(nodes[node_size],
                                                node_size_range)
        node_size = 'clamped_size'

    return node_size
    def update(self) -> pd.DataFrame:

        network_data: pd.DataFrame = self.compute()
        network_data["weight"] = pu.clamp_values(list(network_data["weight"]),
                                                 (0.1, 2.0))
        di: pd.DataFrame = self.inferred_topics.document_index.pipe(
            pu.set_index, columns='document_id')[["document_name"]]
        network_data = network_data.pipe(
            pu.set_index, columns='document_id').merge(di,
                                                       left_index=True,
                                                       right_index=True)
        network_data["title"] = network_data["document_name"]

        if len(network_data) == 0:
            raise pu.EmptyDataError()

        return network_data
示例#4
0
def get_positioned_nodes_as_dict(
    G: nx.Graph, layout: NodesLayout, node_size: str, node_size_range: Optional[Tuple[Number, Number]]
) -> dict:

    nodes = get_positioned_nodes(G, layout)

    if node_size in nodes.keys() and node_size_range is not None:
        nodes['clamped_size'] = clamp_values(nodes[node_size], node_size_range)
        node_size = 'clamped_size'

    label_y_offset = 'y_offset' if node_size in nodes.keys() else node_size + 8
    if label_y_offset == 'y_offset':
        nodes['y_offset'] = [y + r for (y, r) in zip(nodes['y'], [r / 2.0 + 8 for r in nodes[node_size]])]

    nodes = {k: list(nodes[k]) for k in nodes}

    return nodes
示例#5
0
文件: plot.py 项目: humlab/penelope
def plot(  # pylint: disable=W0102
    network,
    layout,
    scale=1.0,  # pylint: disable=unused-argument
    threshold=0.0,
    node_description=None,
    node_size=5,
    node_size_range=[20, 40],
    weight_scale=5.0,
    normalize_weights=True,
    node_opts=None,
    line_opts=None,
    text_opts=None,
    element_id='nx_id3',
    figsize=(900, 900),
    tools=None,
    palette=DFLT_PALETTE,  # pylint: disable=unused-argument
    **figkwargs,
):
    if threshold > 0:
        network = nu.get_sub_network(network, threshold)

    edges = nu.get_positioned_edges(network, layout)

    if normalize_weights and 'weight' in edges.keys():
        max_weight = max(edges['weight'])
        edges['weight'] = [float(x) / max_weight for x in edges['weight']]

    if weight_scale != 1.0 and 'weight' in edges.keys():
        edges['weight'] = [weight_scale * float(x) for x in edges['weight']]

    # edges = dict(source=u, target=v, xs=xs, ys=ys, weights=weights)

    nodes = nu.get_positioned_nodes(network, layout)

    # node_size = setup_node_size(nodes, node_size, node_size_range)
    if node_size in nodes.keys() and node_size_range is not None:
        nodes['clamped_size'] = pu.clamp_values(nodes[node_size],
                                                node_size_range)
        node_size = 'clamped_size'

    label_y_offset = 'y_offset' if node_size in nodes.keys() else node_size + 8
    if label_y_offset == 'y_offset':
        nodes['y_offset'] = [
            y + r
            for (y,
                 r) in zip(nodes['y'], [r / 2.0 + 8 for r in nodes[node_size]])
        ]

    edges = {k: list(edges[k]) for k in edges}
    nodes = {k: list(nodes[k]) for k in nodes}

    edges_source: ColumnDataSource = ColumnDataSource(edges)
    nodes_source: ColumnDataSource = ColumnDataSource(nodes)

    node_opts = pu.extend(DFLT_NODE_OPTS, node_opts or {})
    line_opts = pu.extend(DFLT_EDGE_OPTS, line_opts or {})

    p = figure(plot_width=figsize[0],
               plot_height=figsize[1],
               tools=tools or TOOLS,
               **figkwargs)

    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None

    _ = p.multi_line(xs='xs',
                     ys='ys',
                     line_width='weights',
                     source=edges_source,
                     **line_opts)  # pylint: disable=too-many-function-args
    r_nodes = p.circle(x='x',
                       y='y',
                       size=node_size,
                       source=nodes_source,
                       **node_opts)  # pylint: disable=too-many-function-args

    if 'fill_color' in nodes.keys():
        r_nodes.glyph.fill_color = 'fill_color'

    if node_description is not None:
        text_source = ColumnDataSource(
            dict(text_id=node_description.index, text=node_description))
        p.add_tools(
            bokeh.models.HoverTool(
                renderers=[r_nodes],
                tooltips=None,
                callback=wu.glyph_hover_callback(nodes_source,
                                                 'node_id',
                                                 text_source,
                                                 element_id=element_id),
            ))

    label_opts = {
        **DFLT_TEXT_OPTS,
        **dict(y_offset=label_y_offset,
               text_color='black',
               text_baseline='bottom'),
        **(text_opts or {}),
    }

    # if label_opts.get('text'):
    #     nodes_source.data[label_opts.get('text')] = [ str(x) for x in nodes_source.data[label_opts.get('text')] ]

    p.add_layout(bokeh.models.LabelSet(source=nodes_source, **label_opts))

    return p