Пример #1
0
def draw_enriched_plot(graph,
                       safe_score,
                       metainfo,
                       fea,
                       _filter_size=0,
                       mode='file',
                       **kwargs):
    """
    Draw simple node network which only show component which is larger than _filter_size and colorized with its safe_score.

    :param tmap.tda.Graph.Graph graph:
    :param pd.DataFrame safe_score:
    :param fea:
    :param metainfo:
    :param _filter_size:
    :param kwargs:
    :return:
    """

    enriched_nodes, comps_nodes = metainfo[fea]

    node_pos = graph.nodePos
    sizes = graph.size
    safe_score = safe_score.to_dict(orient='dict')

    fig = plotly.tools.make_subplots(1, 1)
    node_line = vis_progressX(graph, simple=True, mode='obj').data[0]
    fig.append_trace(node_line, 1, 1)

    for idx, nodes in enumerate(comps_nodes):
        if _filter_size:
            if len(nodes) <= _filter_size:
                continue

        tmp1 = {
            k: v if k in nodes else np.nan
            for k, v in safe_score[fea].items()
        }
        node_position = go.Scatter(
            # node position
            visible=True,
            x=node_pos[[
                k for k, v in safe_score[fea].items() if not np.isnan(tmp1[k])
            ], 0],
            y=node_pos[[
                k for k, v in safe_score[fea].items() if not np.isnan(tmp1[k])
            ], 1],
            hoverinfo="text",
            text=[
                'node:%s,SAFE:%s' % (k, safe_score[fea][k])
                for k, v in safe_score[fea].items() if not np.isnan(tmp1[k])
            ],
            marker=dict(  # color=node_colors,
                size=[
                    7 + sizes[_] for _ in [
                        k for k, v in safe_score[fea].items()
                        if not np.isnan(tmp1[k])
                    ]
                ],
                opacity=0.8),
            showlegend=True,
            name='comps_%s' % idx,
            mode="markers")
        fig.append_trace(node_position, 1, 1)

    fig.layout.font.size = 15
    fig.layout.title = fea
    fig.layout.height = 1500
    fig.layout.width = 1500
    fig.layout.hovermode = 'closest'
    return write_figure(fig, mode, **kwargs)
Пример #2
0
def compare_draw(data,
                 graph,
                 fit_result,
                 safe_scores,
                 fea1,
                 fea2=None,
                 nr_threshold=0.5,
                 mode='obj',
                 **kwargs):
    if fea2 is not None:
        col = 2
        subtitles = [
            '%s ordination' % fea1,
            '%s Tmap' % fea1,
            '%s ordination' % fea2,
            '%s Tmap' % fea2
        ]
        feas = [fea1, fea2]
    else:
        col = 1
        subtitles = ['%s ordination' % fea1, '%s Tmap' % fea1]
        feas = [fea1]

    fig = tools.make_subplots(2,
                              col,
                              subplot_titles=subtitles,
                              horizontal_spacing=0,
                              vertical_spacing=0)
    projected_X = graph.data

    def draw_ord_and_tmap(fig, fit_result, fea, row, col, data):
        if fea in data.columns:
            color = Color(data.loc[:, fea].astype(float), target_by='sample')
        else:
            raise Exception(
                'Error occur, %s seem like a new feature for given data' % fea)

        fig.append_trace(
            go.Scatter(
                x=projected_X[:, 0],
                y=projected_X[:, 1],
                #                                      text=metadata.loc[:,fea],
                hoverinfo='text',
                mode='markers',
                marker=dict(color=color.get_sample_colors()),
                showlegend=False),
            row,
            col)
        if fea in fit_result.index:
            fig.append_trace(
                go.Scatter(x=[0, fit_result.loc[fea, 'adj_Source']],
                           y=[0, fit_result.loc[fea, 'adj_End']],
                           mode='lines+text',
                           showlegend=False,
                           text=['', round(fit_result.loc[fea, 'r2'], 4)]),
                row, col)

    enriched_nodes = get_significant_nodes(graph=graph,
                                           safe_scores=safe_scores,
                                           nr_threshold=nr_threshold)  # todo

    fig_container = []
    for idx, fea in enumerate(feas):
        draw_ord_and_tmap(fig, projected_X, fit_result, fea, idx + 1, 1, data)
        cache = {
            node: safe_scores[fea][node] if node in enriched_nodes[fea] else 0
            for node in safe_scores[fea].keys()
        }
        f = vis_progressX(graph,
                          color=Color(cache, target_by='node'),
                          mode='obj',
                          simple=True)
        fig_container.append(f)

    for idx, f in enumerate(fig_container):
        for _ in f.data:
            fig.append_trace(_, idx + 1, 2)

    fig.layout.width = 2000
    fig.layout.height = 2000
    fig.layout.xaxis1.zeroline = False
    fig.layout.yaxis1.zeroline = False
    fig.layout.xaxis3.zeroline = False
    fig.layout.yaxis3.zeroline = False
    fig.layout.hovermode = 'closest'
    # showticklabels
    for _ in dir(fig.layout):
        if _.startswith('xaxis') or _.startswith('yaxis'):
            fig.layout[_]['showticklabels'] = False
    fig.layout.font.update(dict(size=20))
    for _ in fig.layout.annotations:
        _['font'].update(dict(size=25))

    return write_figure(fig, mode, **kwargs)
Пример #3
0
def vis_progressX(graph,
                  simple=False,
                  mode='file',
                  color=None,
                  _color_SAFE=None,
                  min_size=10,
                  max_size=40,
                  **kwargs):
    """
    For dynamic visualizing tmap construction process, it performs a interactive graph based on `plotly` with a slider to present the process from ordination to graph step by step. Currently, it doesn't provide any API for overriding the number of step from ordination to graph. It may be implemented at the future.

    If you want to draw a simple graph with edges and nodes instead of the process,  try the params ``simple``.

    This visualized function is mainly based on plotly which is a interactive Python graphing library. The params mode is trying to provide multiple type of return for different purpose. There are three different modes you can choose including "file" which return a html created by plotly, "obj" which return a reusable python dict object and "web" which normally used at notebook and make inline visualization possible.

    The color part of this function has a little bit complex because of the multiple sub-figures. Currently, it use the ``tmap.tda.plot.Color`` class to auto generate color with given array. More detailed about how to auto generate color could be reviewed at the annotation of ``tmap.tda.plot.Color``.

    In this function,  there are two kinds of color need to implement.

        * First, all color and its showing text values of samples points should be followed by given color params. The color could be **any array** which represents some measurement of Nodes or Samples. **It doesn't have to be SAFE score**.

        * Second, The ``_color_SAFE`` param should be a ``Color`` with a nodes-length array, which is normally a SAFE score.

    :param tmap.tda.Graph.Graph graph:
    :param str mode: [file|obj|web]
    :param bool simple:
    :param color:
    :param _color_SAFE:
    :param kwargs:
    :return:
    """
    node_pos = graph.nodePos
    # shape is average projected_data (node x lens)
    sample_pos = graph.data
    # shape is raw projected_data (sample x lens)
    nodes = graph.nodes
    sizes = graph.size

    sample_names = np.array(graph.sample_names.astype(str))
    minmax_scaler = MinMaxScaler(feature_range=(min_size, max_size))
    mms_color = MinMaxScaler(feature_range=[0, 1])

    scaled_size = minmax_scaler.fit_transform(
        np.array([sizes[_] for _ in range(len(nodes))]).reshape(-1, 1))

    # init some empty values if color wasn't given
    target_v_raw = [0 for _ in nodes]
    target_v = [0 for _ in nodes]
    target_colors = ['blue' for _ in nodes]
    sample_colors = ['red' for _ in sample_names]
    cat2color = defaultdict(lambda: 'blue')
    legend_names = []

    if color is None or type(color) == str:
        color = 'red' if color is None else color
        color_map = {node_id: color for node_id in graph.nodes}
        target2colors = (np.zeros(
            (len(graph.nodes), 1)), [color] * len(graph.nodes))
    else:
        color_map, target2colors = color.get_colors(graph.nodes)
        if types.is_numeric_dtype(target2colors[0]):
            target_v = mms_color.fit_transform(target2colors[0]).ravel()
        else:
            target_v = []
        target_v_raw = target2colors[0].ravel()
        target_colors = target2colors[1]

        sample_colors, cat2color = color.get_sample_colors()
        if color.dtype == 'categorical':
            legend_names = target2colors[0][:, 0]

    # For calculating the dynamic process. It need to duplicate the samples first.
    # reconstructing the ori_MDS into the samples_pos
    # reconstructing the node_pos into the center_pos
    sample_tmp = []
    center_tmp = []
    text_tmp = []
    duplicated_sample_colors = []
    for n in nodes:
        sample_tmp.append(sample_pos[nodes[n]['sample'], :])
        center_tmp.append(np.repeat(node_pos[[n], :], sizes[n], axis=0))
        text_tmp.append(sample_names[nodes[n]['sample']])
        if color is not None:
            duplicated_sample_colors += list(
                np.repeat(color_map.get(n, 'blue'), sizes[n]))
        else:
            duplicated_sample_colors += ["blue"] * sizes[n]
    duplicated_sample_pos = np.concatenate(sample_tmp, axis=0)
    duplicated_node_pos = np.concatenate(center_tmp, axis=0)
    duplicated_samples_text = np.concatenate(text_tmp, axis=0)
    assert duplicated_sample_pos.shape[0] == duplicated_node_pos.shape[
        0] == duplicated_samples_text.shape[0] == len(duplicated_sample_colors)
    # For visualizing the movement of samples, it need to multiply one sample into multiple samples which is need to reconstruct pos,text.

    # prepare edge data
    xs = []
    ys = []
    if node_pos.shape[1] < 2:
        raise Exception(
            "using first two axis as original position, there is only one filter"
        )
    # todo: init some more robust way to draw network
    for edge in graph.edges:
        xs += [node_pos[edge[0], 0], node_pos[edge[1], 0], None]
        ys += [node_pos[edge[0], 1], node_pos[edge[1], 1], None]

    # if there are _color_SAFE, it will present two kinds of color. if simple != True
    # one is base on original data, one is transformed-SAFE data. Use the second one.
    if _color_SAFE is not None:
        safe_color, safe_t2c = _color_SAFE.get_colors(graph.nodes)
        # former is a dict which key is node id and values is node color
        # second is a tuple (node values, node color)
        target_SAFE_color = [safe_color[_] for _ in graph.nodes]
        target_SAFE_raw_v = safe_t2c[0].ravel()  # raw node values
    else:
        target_SAFE_color = []
        target_SAFE_raw_v = []

    # prepare node & samples text
    node_text = c_node_text(nodes, sample_names, target_v_raw)
    ### samples text
    samples_text = ['sample ID:%s' % _ for _ in sample_names]

    node_line = go.Scatter(
        # ordination line
        visible=False,
        x=xs,
        y=ys,
        marker=dict(color="#8E9DA2", opacity=0.7),
        line=dict(width=1),
        hoverinfo='skip',
        showlegend=False,
        mode="lines")

    node_marker = go.Scatter(
        # node position
        visible=False,
        x=node_pos[:, 0],
        y=node_pos[:, 1],
        hovertext=node_text,
        hoverinfo="text",
        marker=dict(color=target_colors, size=scaled_size, opacity=1),
        showlegend=False,
        mode="markers")

    sample_marker = go.Scatter(visible=True,
                               x=sample_pos[:, 0],
                               y=sample_pos[:, 1],
                               marker=dict(color=sample_colors),
                               hovertext=samples_text,
                               hoverinfo="text",
                               showlegend=False,
                               mode="markers")
    # After all prepared work have been finished.
    # Append all traces instance into fig
    if simple:
        fig = plotly.tools.make_subplots(1, 1)
        node_line['visible'] = True
        node_marker['visible'] = True
        fig.append_trace(node_line, 1, 1)

        if color is not None and type(color) != str:
            if color.dtype == 'numerical':
                # with continuous legend bar
                # A dict which includes values of node to color
                # For make continuous color legend
                nv2c = dict(zip(target_v, target_colors))
                colorscale = []
                for _ in sorted(set(target_v)):
                    colorscale.append([_, nv2c[_]])
                colorscale[-1][0] = 1  # the last value must be 1
                colorscale[0][0] = 0  # the first value must be 0

                node_marker['marker']['color'] = target2colors[0].ravel()
                # it is not necessary to use target_v, it could use original data target2colors.
                # Or it will display normalized values which will confuse reader.
                node_marker['marker']['colorscale'] = colorscale
                node_marker['marker']['cmin'] = target2colors[0].min()
                node_marker['marker']['cmax'] = target2colors[0].max()
                node_marker['marker']['showscale'] = True
                fig.append_trace(node_marker, 1, 1)
            else:  # if color.dtype == 'categorical'
                for cat in np.unique(legend_names):
                    # it won't missing variables legend_names. check 434 line
                    # it will auto sort with alphabetic order
                    node_marker = go.Scatter(
                        # node position
                        visible=True,
                        x=node_pos[legend_names == cat, 0],
                        y=node_pos[legend_names == cat, 1],
                        text=np.array(node_text)[legend_names == cat],
                        hoverinfo="text",
                        marker=dict(color=cat2color[cat],
                                    size=scaled_size[legend_names == cat, 0],
                                    opacity=1),
                        name=str(cat),
                        showlegend=True,
                        mode="markers")
                    fig.append_trace(node_marker, 1, 1)
        elif type(color) == str:
            node_marker['marker']['color'] = color
            fig.append_trace(node_marker, 1, 1)
        else:
            fig.append_trace(node_marker, 1, 1)
        fig.layout.hovermode = "closest"
    else:
        fig = plotly.tools.make_subplots(
            rows=2,
            cols=2,
            specs=[[{
                'rowspan': 2
            }, {}], [None, {}]],
        )
        # original place or ordination place
        fig.append_trace(sample_marker, 1, 1)

        # dynamic process to generate 5 binning positions
        n_step = 5
        for s in range(1, n_step + 1):
            # s = 1: move 1/steps
            # s = steps: move to node position.
            fig.append_trace(
                go.Scatter(visible=False,
                           x=duplicated_sample_pos[:, 0] +
                           ((duplicated_node_pos - duplicated_sample_pos) /
                            n_step * s)[:, 0],
                           y=duplicated_sample_pos[:, 1] +
                           ((duplicated_node_pos - duplicated_sample_pos) /
                            n_step * s)[:, 1],
                           marker=dict(color=duplicated_sample_colors),
                           hoverinfo="text",
                           hovertext=duplicated_samples_text,
                           showlegend=False,
                           mode="markers"), 1, 1)

        # Order is important, do not change the order !!!
        # There are the last 5 should be visible at any time
        fig.append_trace(node_line, 1, 1)
        fig.append_trace(node_marker, 1, 1)
        node_line['visible'] = True
        node_marker['visible'] = True
        sample_marker['visible'] = True
        fig.append_trace(node_line, 2, 2)
        if _color_SAFE is not None:
            node_text = c_node_text(nodes, sample_names, target_SAFE_raw_v)
            node_marker['hovertext'] = node_text
            node_marker['marker']['color'] = target_SAFE_color
        fig.append_trace(node_marker, 2, 2)
        fig.append_trace(sample_marker, 1, 2)
        ############################################################
        steps = []
        for i in range(n_step + 1):
            step = dict(
                method='restyle',
                args=['visible', [False] * (n_step + 3) + [True, True, True]],
            )
            if i >= n_step:
                step["args"][1][-5:] = [
                    True
                ] * 5  # The last 5 should be some traces must present at any time.
            else:
                step['args'][1][i] = True  # Toggle i'th trace to "visible"
            steps.append(step)

        sliders = [
            dict(active=0,
                 currentvalue={"prefix": "status: "},
                 pad={"t": 20},
                 steps=steps)
        ]
        ############################################################
        layout = dict(
            sliders=sliders,
            width=2000,
            height=1000,
            xaxis1={  # "range": [0, 1],
                "domain": [0, 0.5]
            },
            yaxis1={  # "range": [0, 1],
                "domain": [0, 1]
            },
            xaxis2={  # "range": [0, 1],
                "domain": [0.6, 0.9]
            },
            yaxis2={  # "range": [0, 1],
                "domain": [0.5, 1]
            },
            xaxis3={  # "range": [0, 1],
                "domain": [0.6, 0.9]
            },
            yaxis3={  # "range": [0, 1],
                "domain": [0, 0.5]
            },
            hovermode="closest")
        fig.layout.update(layout)

    return write_figure(fig, mode, **kwargs)