def test_valid_colorscale(): pipe = make_mapper_pipeline() fig_2d = plot_static_mapper_graph( pipe, X, layout_dim=2, plotly_params={"node_trace": { "marker_colorscale": "blues" }}) fig_3d = plot_static_mapper_graph( pipe, X, layout_dim=3, plotly_params={"node_trace": { "marker_colorscale": "blues" }}) # Test that the custom colorscale is correctly applied both in 2d and in 3d marker_colorscale = fig_2d.data[1]["marker"]["colorscale"] marker_colorscale_3d = fig_3d.data[1]["marker"]["colorscale"] assert marker_colorscale == marker_colorscale_3d # Test that the default colorscale is "viridis" and that the custom one is # different fig_default = plot_static_mapper_graph(pipe, X) marker_colorscale_default = \ fig_default.data[1]["marker"]["colorscale"] assert marker_colorscale_default == viridis_colorscale assert marker_colorscale != marker_colorscale_default
def test_invalid_color_features_as_array_of_indices(): pipe = make_mapper_pipeline() with pytest.raises(ValueError): plot_static_mapper_graph( pipe, X_arr, color_data=X_arr, color_features=np.arange(X_arr.shape[1]) )
def test_node_color_statistic_as_ndarray_wrong_length(): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic = np.arange(len(graph.vs) + 1) with pytest.raises(ValueError): plot_static_mapper_graph(pipe, X_arr, node_color_statistic=node_color_statistic)
def test_colors_same_2d_3d(X, color_variable, node_color_statistic): pipe = make_mapper_pipeline() fig_2d = plot_static_mapper_graph( pipe, X, layout_dim=2, color_variable=color_variable, node_color_statistic=node_color_statistic ) fig_3d = plot_static_mapper_graph( pipe, X, layout_dim=3, color_variable=color_variable, node_color_statistic=node_color_statistic ) assert fig_2d.data[1].marker.color == fig_3d.data[1].marker.color
def test_colors_same_2d_3d(X, color_data, node_color_statistic): pipe = make_mapper_pipeline() fig_2d = plot_static_mapper_graph( pipe, X, layout_dim=2, color_data=color_data, node_color_statistic=node_color_statistic ) fig_3d = plot_static_mapper_graph( pipe, X, layout_dim=3, color_data=color_data, node_color_statistic=node_color_statistic ) assert np.array_equal(fig_2d.data[1].marker.color, fig_3d.data[1].marker.color)
def test_column_dropdown(X, columns, layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X, color_data=X, layout_dim=layout_dim) fig_buttons = fig.layout.updatemenus[0].buttons assert list(fig.data[1].marker.color) == \ list(fig_buttons[0].args[0]["marker.color"][1]) for i, col in enumerate(columns): fig_col = plot_static_mapper_graph( pipe, X, layout_dim=layout_dim, color_data=X, color_features=col ) assert list(fig_col.data[1].marker.color) == \ list(fig_buttons[i].args[0]["marker.color"][1])
def test_valid_layout_dim(X, layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim) edge_trace = fig.data[0] assert hasattr(edge_trace, "x") and hasattr(edge_trace, "y") is_z_present = hasattr(edge_trace, "z") assert is_z_present if layout_dim == 3 else not is_z_present
def test_valid_hoverlabel_bgcolor(X, layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph( pipe, X, layout_dim=layout_dim, plotly_params={"node_trace": {"hoverlabel_bgcolor": "white"}} ) assert fig.data[1]["hoverlabel"]["bgcolor"] == "white"
def test_unsuitable_colorscale_for_hoverlabel_3d(X): pipe = make_mapper_pipeline() with pytest.warns(RuntimeWarning): _ = plot_static_mapper_graph( pipe, X, layout_dim=3, plotly_params={"node_trace": {"marker_colorscale": hsl_colorscale}} )
def test_color_by_column_dropdown_2d(layout_dim): pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim, color_by_columns_dropdown=True) fig_buttons = fig.layout.updatemenus[0].buttons assert list(fig.data[1].marker.color) == \ list(fig_buttons[0].args[0]["marker.color"][1]) for i in range(X.shape[1]): fig_col_i = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim, color_variable=i) assert list(fig_col_i.data[1].marker.color) == \ list(fig_buttons[i + 1].args[0]["marker.color"][1])
def test_node_color_statistic_as_ndarray(): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic = np.arange(len(graph.vs)) fig = plot_static_mapper_graph(pipe, X_arr, node_color_statistic=node_color_statistic) assert np.array_equal(fig.data[1].marker.color, node_color_statistic)
def test_cluster_sizes(self): """Verify that the total number of calculated clusters is equal to the number of displayed clusters.""" pipe = make_mapper_pipeline(clusterer=FirstSimpleGap()) fig = plot_static_mapper_graph(pipe, X_arr) node_trace = fig.data[1] node_sizes_vis = [_get_size_from_hovertext(ht) for ht in node_trace.hovertext] g = pipe.fit_transform(X_arr) node_size_real = [len(node) for node in g.vs['node_elements']] assert sum(node_sizes_vis) == sum(node_size_real)
def test_node_color_statistic_as_ndarray(is_2d): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_color_statistic_col_0 = np.arange(len(graph.vs)) if is_2d: node_color_statistic = np.vstack([node_color_statistic_col_0, node_color_statistic_col_0]).T else: node_color_statistic = node_color_statistic_col_0 fig = plot_static_mapper_graph(pipe, X_arr, node_color_statistic=node_color_statistic) assert np.array_equal(fig.data[1].marker.color, node_color_statistic_col_0)
def test_is_data_present(self): """Verify that what we see in the graph corresponds to the number of samples in the graph.""" pipe = make_mapper_pipeline() warnings.simplefilter("ignore") fig = plot_static_mapper_graph(pipe, X, color_variable=colors, clone_pipeline=False) xy = np.stack([fig.get_state()['_data'][1][c] for c in ['x', 'y']]).transpose() assert X.shape >= xy.shape real_colors = fig.get_state()['_data'][1]['marker']['color'] assert len(real_colors) == xy.shape[0]
def test_color_features_as_estimator_or_callable(color_features): pipe = make_mapper_pipeline() graph = pipe.fit_transform(X_arr) node_elements = graph.vs["node_elements"] pca = PCA(n_components=2) color_data_transformed = pca.fit_transform(X_arr) node_colors_color_features = \ np.array([np.mean(color_data_transformed[itr, 0]) for itr in node_elements]) fig = plot_static_mapper_graph(pipe, X_arr, color_data=X_arr, color_features=color_features) assert_almost_equal(fig.data[1].marker.color, node_colors_color_features)
def test_is_data_present(self): """Verify that what we see in the graph corresponds to the number of samples in the graph.""" pipe = make_mapper_pipeline() fig = plot_static_mapper_graph(pipe, X_arr, color_data=colors, clone_pipeline=False) node_trace_x = fig.data[1].x node_trace_y = fig.data[1].y assert node_trace_x.shape[0] == node_trace_y.shape[0] num_nodes = node_trace_x.shape[0] assert len(X_arr) >= num_nodes fig_colors = fig.data[1].marker.color assert len(fig_colors) == num_nodes
def test_interactive_plotter_attrs(X, color_data, layout_dim): """Simple tests on the attributes stored by MapperInteractivePlotter when plotting.""" pipe = make_mapper_pipeline() plotter = MapperInteractivePlotter(pipe, X) plotter.plot(color_data=color_data, layout_dim=layout_dim) # 1 Test graph_ graph = pipe.fit_transform(X) assert plotter.graph_.isomorphic(graph) # 2 Test pipeline_ assert str(plotter.pipeline_) == str(pipe) # 3 Test color_features_ if color_data is not None: color_data_transformed = color_data else: color_data_transformed = np.arange(len(X)).reshape(-1, 1) assert np.array_equal(plotter.color_features_, color_data_transformed) # 4 Test node_summaries_ assert len(plotter.node_summaries_) == len(graph.vs) # 5 Test figure_ static_fig = plot_static_mapper_graph(pipe, X, color_data=color_data, layout_dim=layout_dim) interactive_fig = plotter.figure_ edge_trace_attrs = ["hoverinfo", "line", "name", "x", "y"] for attr in edge_trace_attrs: assert np.all(getattr(interactive_fig.data[0], attr) == getattr(static_fig.data[0], attr)) # Excluding marker, which gets treated separately below node_trace_attrs = ["hoverinfo", "hovertext", "mode", "name", "x", "y"] for attr in node_trace_attrs: assert np.all(getattr(interactive_fig.data[1], attr) == getattr(static_fig.data[1], attr)) marker_attrs = ["color", "colorbar", "colorscale", "line", "opacity", "reversescale", "showscale", "size", "sizemin", "sizemode", "sizeref"] for attr in marker_attrs: assert np.all(getattr(interactive_fig.data[1].marker, attr) == getattr(static_fig.data[1].marker, attr))
def test_is_data_present(self): """Verify that what we see in the graph corresponds to the number of samples in the graph.""" pipe = make_mapper_pipeline() warnings.simplefilter("ignore") fig = plot_static_mapper_graph(pipe, X, color_variable=colors, clone_pipeline=False) node_trace_x = fig.get_state()['_data'][1]["x"] node_trace_y = fig.get_state()['_data'][1]["y"] assert node_trace_x["shape"][0] == node_trace_y["shape"][0] num_nodes = node_trace_x["shape"][0] assert len(X) >= num_nodes fig_colors = fig.get_state()['_data'][1]['marker']['color'] assert len(fig_colors) == num_nodes
def test_invalid_layout_algorithm(X): with pytest.raises(KeyError): pipe = make_mapper_pipeline() _ = plot_static_mapper_graph(pipe, X, layout="foobar")
def test_invalid_layout_dim(X, layout_dim): with pytest.raises(ValueError): pipe = make_mapper_pipeline() _ = plot_static_mapper_graph(pipe, X, layout_dim=layout_dim)
def get_region_plot(pipe, data, layout, node_elements, colorscale): '''Function to generate a figure of the mapper graph colored by identified regions Parameters ---------- pipe : MapperPipeline The Mapper pipeline to compute the mapper-graph data : ndarray (n_samples x n_dim) Data used for mapper layout : igraph.layout.Layout Layout of graph node_elements : tuple Tuple of arrays where array at positin x contains the data points for node x colorscale : list List of colors to use for each region Returns ------- fig : igraph object ''' regions = utils.get_regions() # set node color: # 1. assign to each node of a region its color (zip()) # 2. convert zip elements to list (map()) # 3. flatten list (itertools.chain()) # 4. sort values by keys # 5. convert to ordered dictionary # 6. extract values and convert to list node_color = np.array( list( collections.OrderedDict( sorted( itertools.chain(*map(list, [ zip(regions[region], itertools.repeat(colorscale[region])) for region in range(len(regions)) ])))).values())) # set plotly arguments: # 1. set uniform node size # 2. hide scale of marker color plotly_kwargs = { 'node_trace_marker_size': [1] * len(node_elements), 'node_trace_marker_showscale': False, 'node_trace_hoverlabel': node_color, 'node_trace_marker_color': node_color } fig = plot_static_mapper_graph(pipe, data, layout, layout_dim=2, color_by_columns_dropdown=False, plotly_kwargs=plotly_kwargs) # update colors to fig fig._data[1]['marker'][ 'color'] = node_color # hack around with the new api return fig
def test_color_features_as_columns_fails_on_series(): pipe = make_mapper_pipeline() with pytest.raises(ValueError, match="If `color_data` is a pandas series"): plot_static_mapper_graph(pipe, X_df, color_data=X_df["a"], color_features="a")
def main(): directory = DOTENV_KEY2VAL["DATA_DIR"] image_dir = directory + "/patch_92/" diagnosis_json = "collected_diagnoses_complete.json" ( cn_patients, mci_patients, ad_patients, ) = utils.get_earliest_available_diagnosis(directory + diagnosis_json) images_all = utils.get_arrays_from_dir( image_dir, cn_patients + mci_patients + ad_patients) cn_patient_list = [ 1 for patient in range(len(cn_patients) - 1) ] # substracting one due to unfound MRI for one CN patient mci_patient_list = [2 for patient in range(len(mci_patients))] ad_patient_list = [3 for patient in range(len(ad_patients))] diags = np.array(cn_patient_list + mci_patient_list + ad_patient_list).reshape(-1, 1) ohe = OneHotEncoder() labels = ohe.fit_transform(diags).toarray() images = [] for image in images_all: images.append(image.flatten()) images_all = np.asarray(images) pca = PCA(n_components=440) pca.fit(images_all) fig, ax0 = plt.subplots(nrows=1, sharex=True, figsize=(6, 6)) ax0.plot( np.arange(1, pca.n_components_ + 1), pca.explained_variance_ratio_, "+", linewidth=2, ) ax0.set_ylabel("PCA explained variance ratio") ax0.legend(prop=dict(size=12)) plt.savefig(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + "elbow_plot.png") n_components = 3 pca = PCA(n_components=n_components) images_all_projected = pca.fit_transform(images_all) images_all_projected = np.append(images_all_projected, labels, axis=1) mapper_pipeline = make_mapper_pipeline( filter_func=Projection(columns=[index for index in range(2)]), cover=CubicalCover(n_intervals=10, overlap_frac=0.25), clusterer=DBSCAN(eps=0.5, min_samples=5), verbose=True, n_jobs=4, ) plotly_params = {"node_trace": {"marker_colorscale": "Blues"}} fig = plot_static_mapper_graph( mapper_pipeline, images_all_projected, layout_dim=3, color_by_columns_dropdown=True, plotly_params=plotly_params, ) fig.write_html(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + "mapper_2_dimensional_reduction.html") images_all_projected = pd.DataFrame(images_all_projected) fig = px.scatter_3d( images_all_projected, x=0, y=1, z=2, color=3, title="3D scatterplot of the PCA of the image data", ) fig.write_html(DOTENV_KEY2VAL["GEN_FIGURES_DIR"] + "scatterplot_pca_3d.html")
def test_valid_color_data_transformed(color_data, color_features): """Test that no errors are thrown when pandas dataframes/series are passed as color_data and/or returned when applying color_features.""" pipe = make_mapper_pipeline() plot_static_mapper_graph(pipe, X_arr, color_data=color_data, color_features=color_features)
def get_graph_plot_colored_by_election_results(pipeline, year, df, data, keep_layout): '''Function make plot of US with counties colored by winner of election Parameters ---------- pipe : MapperPipeline The Mapper pipeline to compute the mapper-graph year : np.int Color by election results from year `year` df : pandas data frame Data frame containing info of winner per county, year of election and number of electors in county data : ndarray (n_samples x n_dim) Data used for mapper keep_layout : list of two dicts, with keys 'x', 'y', and such that values are 1d arrays Positions of lines (keep_layout[0]) and markers respectively (keep_layout[1]) for the mapper graph Returns ------- fig: igraph object ''' node_elements = pipeline.fit_transform( data)['node_metadata']['node_elements'] # set node color to percentage of number of electors won by republicans node_color = np.array([ 100 * (df[df['year'] == year]['winner'].values * df[df['year'] == year]['n_electors'].values)[x].sum() / df[df['year'] == year]['n_electors'].values[x].sum() for x in node_elements ]) data_cols = utils.get_cols_for_mapper() columns_to_color = dict(zip(data_cols, range(len(data_cols)))) node_text = utils.get_node_text( dict(zip(range(len(node_elements)), node_elements)), utils.get_n_electors( node_elements, df[df['year'] == year]['n_electors'].reset_index(drop=True)), node_color, 'Percentage of Electors Won by Republicans') plotly_kwargs = { 'node_trace_marker_colorscale': 'RdBu', 'node_trace_marker_reversescale': True, 'node_trace_marker_cmin': 0, 'node_trace_marker_cmax': 100, 'node_trace_text': node_text, 'node_trace_marker_size': utils.get_n_electors( node_elements, df[df['year'] == year]['n_electors'].reset_index(drop=True)), 'node_trace_marker_sizeref': .5 / max( utils.get_n_electors( node_elements, df[df['year'] == year]['n_electors'].reset_index(drop=True))) } fig = plot_static_mapper_graph(pipeline, data, 'kk', layout_dim=2, node_color_statistic=node_color, color_by_columns_dropdown=True, plotly_kwargs=plotly_kwargs) if keep_layout is not None: fig._data[0].update(keep_layout[0]) fig._data[1].update(keep_layout[1]) return fig
def test_color_data_invalid_length(): pipe = make_mapper_pipeline() with pytest.raises(ValueError): plot_static_mapper_graph(pipe, X_arr, color_data=X_arr[:-1])
def test_invalid_type_node_color_statistic_static(): pipe = make_mapper_pipeline() with pytest.raises(ValueError): plot_static_mapper_graph(pipe, X_arr, node_color_statistic="foo")
def test_invalid_color_features_types(color_features): pipe = make_mapper_pipeline() with pytest.raises(ValueError): plot_static_mapper_graph(pipe, X_arr, color_features=color_features)