Exemplo n.º 1
0
def grouped_violinplot(data1, data2, normalize=False, group_names=['data1', 'data2'],
                       filename=None):
    """
    Plot a grouped violinplot to compare two datasets
    
    The datasets can be normalized by the median and standard deviation of data1.
    
    Parameters
    ----------
    data1: DataFrame
        Data set, columns = variable names
    data2: DataFrame
        Data set, columns = variable names
    normalize : bool, optional
        Normalize both datasets by the median and standard deviation of data1
    group_names : list, optional
        Names used in the legend
    filename: string, optional
        Filename used to save the figure
    """
    assert isinstance(data1, pd.DataFrame)
    assert isinstance(data2, pd.DataFrame)
    assert isinstance(normalize, bool)
    assert isinstance(group_names, list)
    assert isinstance(filename, (type(None), str))
    
    data = _get_grouped_data(data1, data2, normalize, group_names)
    
    plt.figure()
    sns.violinplot(data=data, hue='set', y='value', x='columns',
                   order=data1.columns, split=True)
    
    plt.gca().legend().set_title('')
    plt.gca().set_xlabel('')
    plt.gca().set_ylabel('')
    
    if filename is None:
        plt.show()
    else:
        plt.savefig(filename)
        plt.close()
Exemplo n.º 2
0
    def visualize_model_graph(self,
                              type_of_graph='constraint',
                              filename=None,
                              pos=None):
        """
        This function draws a graph of the communities for a Pyomo model.

        The type_of_graph parameter is used to create either a variable-node graph, constraint-node graph, or
        bipartite graph of the Pyomo model. Then, the nodes are colored based on the communities they are in - which
        is based on the community map (self.community_map). A filename can be provided to save the figure, otherwise
        the figure is illustrated with matplotlib.

        Parameters
        ----------
        type_of_graph: str, optional
            a string that specifies the types of nodes drawn on the model graph, the default is 'constraint'.
            'constraint' draws a graph with constraint nodes,
            'variable' draws a graph with variable nodes,
            'bipartite' draws a bipartite graph (with both constraint and variable nodes)
        filename: str, optional
            a string that specifies a path for the model graph illustration to be saved
        pos: dict, optional
            a dictionary that maps node keys to their positions on the illustration

        Returns
        -------
        fig: matplotlib figure
            the figure for the model graph drawing
        pos: dict
            a dictionary that maps node keys to their positions on the illustration - can be used to create consistent
            layouts for graphs of a given model
        """

        # Check that all arguments are of the correct type

        assert type_of_graph in ('bipartite', 'constraint', 'variable'), \
            "Invalid graph type specified: 'type_of_graph=%s' - Valid values: " \
            "'bipartite', 'constraint', 'variable'" % type_of_graph

        assert isinstance(filename, (type(None), str)), "Invalid value for filename: 'filename=%s' - filename " \
                                                        "must be a string" % filename

        # No assert statement for pos; the NetworkX function can handle issues with the pos argument

        # There is a possibility that the desired networkX graph of the model is already stored in the
        # CommunityMap object (because the networkX graph is required to create the CommunityMap object)
        if type_of_graph != self.type_of_community_map:
            # Use the generate_model_graph function to create a NetworkX graph of the given model (along with
            # number_component_map and constraint_variable_map, which will be used to help with drawing the graph)
            model_graph, number_component_map, constraint_variable_map = generate_model_graph(
                self.model,
                type_of_graph=type_of_graph,
                with_objective=self.with_objective,
                weighted_graph=self.weighted_graph,
                use_only_active_components=self.use_only_active_components)
        else:
            # This is the case where, as mentioned above, we can use the networkX graph that was made to create
            # the CommunityMap object
            model_graph, number_component_map, constraint_variable_map = self.graph, self.graph_node_mapping, \
                                                                         self.constraint_variable_map

        # This line creates the "reverse" of the number_component_map above, since mapping the Pyomo
        # components to their nodes in the networkX graph is more convenient in this function
        component_number_map = ComponentMap(
            (comp, number) for number, comp in number_component_map.items())

        # Create a deep copy of the community_map attribute to avoid destructively modifying it
        numbered_community_map = copy.deepcopy(self.community_map)

        # Now we will use the component_number_map to change the Pyomo modeling components in community_map into the
        # numbers that correspond to their nodes/edges in the NetworkX graph, model_graph
        for key in self.community_map:
            numbered_community_map[key] = ([
                component_number_map[component]
                for component in self.community_map[key][0]
            ], [
                component_number_map[component]
                for component in self.community_map[key][1]
            ])

        # Based on type_of_graph, which specifies what Pyomo modeling components are to be drawn as nodes in the graph
        # illustration, we will now get the node list and the color list, which describes how to color nodes
        # according to their communities (which is based on community_map)
        if type_of_graph == 'bipartite':
            list_of_node_lists = [
                list_of_nodes
                for list_tuple in numbered_community_map.values()
                for list_of_nodes in list_tuple
            ]

            # list_of_node_lists is (as it implies) a list of lists, so we will use the list comprehension
            # below to flatten the list and get our one-dimensional node list
            node_list = [
                node for sublist in list_of_node_lists for node in sublist
            ]

            color_list = []
            # Now, we will find the first community that a node appears in and color the node based on that community
            # In community_map, certain nodes may appear in multiple communities, and we have chosen to give preference
            # to the first community a node appears in
            for node in node_list:
                not_found = True
                for community_key in numbered_community_map:
                    if not_found and node in (
                            numbered_community_map[community_key][0] +
                            numbered_community_map[community_key][1]):
                        color_list.append(community_key)
                        not_found = False

            # Find top_nodes (one of the two "groups" of nodes in a bipartite graph), which will be used to
            # determine the graph layout
            if model_graph.number_of_nodes() > 0 and nx.is_connected(
                    model_graph):
                # An index of 1 used because this tends to place constraint nodes on the left, which is
                # consistent with the else case
                top_nodes = nx.bipartite.sets(model_graph)[1]
            else:
                top_nodes = {
                    node
                    for node in model_graph.nodes()
                    if node in constraint_variable_map
                }

            if pos is None:  # The case where the user has not provided their own layout
                pos = nx.bipartite_layout(model_graph, top_nodes)

        else:  # This covers the case that type_of_community_map is 'constraint' or 'variable'

            # Constraints are in the first list of the tuples in community map and variables are in the second list
            position = 0 if type_of_graph == 'constraint' else 1
            list_of_node_lists = list(i[position]
                                      for i in numbered_community_map.values())

            # list_of_node_lists is (as it implies) a list of lists, so we will use the list comprehension
            # below to flatten the list and get our one-dimensional node list
            node_list = [
                node for sublist in list_of_node_lists for node in sublist
            ]

            # Now, we will find the first community that a node appears in and color the node based on
            # that community (in numbered_community_map, certain nodes may appear in multiple communities,
            # and we have chosen to give preference to the first community a node appears in)
            color_list = []
            for node in node_list:
                not_found = True
                for community_key in numbered_community_map:
                    if not_found and node in numbered_community_map[
                            community_key][position]:
                        color_list.append(community_key)
                        not_found = False

            # Note - there is no strong reason to choose spring layout; it just creates relatively clean graphs
            if pos is None:  # The case where the user has not provided their own layout
                pos = nx.spring_layout(model_graph)

        # Define color_map
        color_map = plt.cm.get_cmap('viridis', len(numbered_community_map))

        # Create the figure and draw the graph
        fig = plt.figure()
        nx.draw_networkx_nodes(model_graph,
                               pos,
                               nodelist=node_list,
                               node_size=40,
                               cmap=color_map,
                               node_color=color_list)
        nx.draw_networkx_edges(model_graph, pos, alpha=0.5)

        # Make the main title
        graph_type = type_of_graph.capitalize()
        community_map_type = self.type_of_community_map.capitalize()
        main_graph_title = "%s graph - colored using %s community map" % (
            graph_type, community_map_type)

        main_font_size = 14
        plt.suptitle(main_graph_title, fontsize=main_font_size)

        # Define a dict that will be used for the graph subtitle
        subtitle_naming_dict = {
            'bipartite':
            'Nodes are variables and constraints & Edges are variables in a constraint',
            'constraint': 'Nodes are constraints & Edges are common variables',
            'variable': 'Nodes are variables & Edges are shared constraints'
        }

        # Make the subtitle
        subtitle_font_size = 11
        plt.title(subtitle_naming_dict[type_of_graph],
                  fontsize=subtitle_font_size)

        if filename is None:
            plt.show()
        else:
            plt.savefig(filename)
            plt.close()

        # Return the figure and pos, the position dictionary used for the graph layout
        return fig, pos
Exemplo n.º 3
0
def pairwise_plot(theta_values, theta_star=None, alpha=None, distributions=[], 
                  axis_limits=None, title=None, add_obj_contour=True, 
                  add_legend=True, filename=None):
    """
    Plot pairwise relationship for theta values, and optionally alpha-level 
    confidence intervals and objective value contours
    
    Parameters
    ----------
    theta_values: DataFrame or tuple
    
        * If theta_values is a DataFrame, then it contains one column for each theta variable 
          and (optionally) an objective value column ('obj') and columns that contains 
          Boolean results from confidence interval tests (labeled using the alpha value). 
          Each row is a sample.
          
          * Theta variables can be computed from ``theta_est_bootstrap``, 
            ``theta_est_leaveNout``, and  ``leaveNout_bootstrap_test``.
          * The objective value can be computed using the ``likelihood_ratio_test``.
          * Results from confidence interval tests can be computed using the  
           ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and 
           ``confidence_region_test``.

        * If theta_values is a tuple, then it contains a mean, covariance, and number 
          of samples (mean, cov, n) where mean is a dictionary or Series 
          (indexed by variable name), covariance is a DataFrame (indexed by 
          variable name, one column per variable name), and n is an integer.
          The mean and covariance are used to create a multivariate normal 
          sample of n theta values. The covariance can be computed using 
          ``theta_est(calc_cov=True)``.
        
    theta_star: dict or Series, optional
        Estimated value of theta.  The dictionary or Series is indexed by variable name.  
        Theta_star is used to slice higher dimensional contour intervals in 2D
    alpha: float, optional
        Confidence interval value, if an alpha value is given and the 
        distributions list is empty, the data will be filtered by True/False 
        values using the column name whose value equals alpha (see results from
        ``leaveNout_bootstrap_test``, ``likelihood_ratio_test``, and 
        ``confidence_region_test``)
    distributions: list of strings, optional
        Statistical distribution used to define a confidence region, 
        options = 'MVN' for multivariate_normal, 'KDE' for gaussian_kde, and 
        'Rect' for rectangular.
        Confidence interval is a 2D slice, using linear interpolation at theta_star.
    axis_limits: dict, optional
        Axis limits in the format {variable: [min, max]}
    title: string, optional
        Plot title
    add_obj_contour: bool, optional
        Add a contour plot using the column 'obj' in theta_values.
        Contour plot is a 2D slice, using linear interpolation at theta_star.
    add_legend: bool, optional
        Add a legend to the plot
    filename: string, optional
        Filename used to save the figure
    """
    assert isinstance(theta_values, (pd.DataFrame, tuple))
    assert isinstance(theta_star, (type(None), dict, pd.Series, pd.DataFrame))
    assert isinstance(alpha, (type(None), int, float))
    assert isinstance(distributions, list)
    assert set(distributions).issubset(set(['MVN', 'KDE', 'Rect']))
    assert isinstance(axis_limits, (type(None), dict))
    assert isinstance(title, (type(None), str))
    assert isinstance(add_obj_contour, bool)
    assert isinstance(filename, (type(None), str))
    
    # If theta_values is a tuple containing (mean, cov, n), create a DataFrame of values
    if isinstance(theta_values, tuple):
        assert(len(theta_values) == 3)
        mean = theta_values[0]
        cov = theta_values[1]
        n = theta_values[2]
        if isinstance(mean, dict):
            mean = pd.Series(mean)
        theta_names = mean.index
        mvn_dist = stats.multivariate_normal(mean, cov)
        theta_values = pd.DataFrame(mvn_dist.rvs(n, random_state=1), columns=theta_names)
            
    assert(theta_values.shape[0] > 0)
    
    if isinstance(theta_star, dict):
        theta_star = pd.Series(theta_star)
    if isinstance(theta_star, pd.DataFrame):
        theta_star = theta_star.loc[0,:]
    
    theta_names = [col for col in theta_values.columns if (col not in ['obj']) 
                        and (not isinstance(col, float)) and (not isinstance(col, int))]
    
    # Filter data by alpha
    if (alpha in theta_values.columns) and (len(distributions) == 0):
        thetas = theta_values.loc[theta_values[alpha] == True, theta_names]
    else:
        thetas = theta_values[theta_names]
    
    if theta_star is not None:
        theta_star = theta_star[theta_names]
    
    legend_elements = []
    
    g = sns.PairGrid(thetas)
    
    # Plot histogram on the diagonal
    # Note: distplot is deprecated and will be removed in a future
    #       version of seaborn, use histplot.  distplot is kept for older
    #       versions of python.
    if check_min_version(sns, "0.11"):
        g.map_diag(sns.histplot)
    else:
        g.map_diag(sns.distplot, kde=False, hist=True, norm_hist=False) 
    
    # Plot filled contours using all theta values based on obj
    if 'obj' in theta_values.columns and add_obj_contour:
        g.map_offdiag(_add_obj_contour, columns=theta_names, data=theta_values, 
                      theta_star=theta_star)
        
    # Plot thetas
    g.map_offdiag(plt.scatter, s=10)
    legend_elements.append(matplotlib.lines.Line2D(
        [0], [0], marker='o', color='w', label='thetas',
        markerfacecolor='cadetblue', markersize=5))
    
    # Plot theta*
    if theta_star is not None:
        g.map_offdiag(_add_scatter, color='k', columns=theta_names, theta_star=theta_star)
        
        legend_elements.append(matplotlib.lines.Line2D(
            [0], [0], marker='o', color='w', label='theta*',
            markerfacecolor='k', markersize=6))
    
    # Plot confidence regions
    colors = ['r', 'mediumblue', 'darkgray']
    if (alpha is not None) and (len(distributions) > 0):
        
        if theta_star is None:
            print("""theta_star is not defined, confidence region slice will be 
                  plotted at the mean value of theta""")
            theta_star = thetas.mean()
        
        mvn_dist = None
        kde_dist = None
        for i, dist in enumerate(distributions):
            if dist == 'Rect':
                lb, ub = fit_rect_dist(thetas, alpha)
                g.map_offdiag(_add_rectangle_CI, color=colors[i], columns=theta_names, 
                            lower_bound=lb, upper_bound=ub)
                legend_elements.append(matplotlib.lines.Line2D(
                    [0], [0], color=colors[i], lw=1, label=dist))
                
            elif dist == 'MVN':
                mvn_dist = fit_mvn_dist(thetas)
                Z = mvn_dist.pdf(thetas)
                score = stats.scoreatpercentile(Z, (1-alpha)*100) 
                g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, 
                            ncells=100, alpha=score, dist=mvn_dist, 
                            theta_star=theta_star)
                legend_elements.append(matplotlib.lines.Line2D(
                    [0], [0], color=colors[i], lw=1, label=dist))
                
            elif dist == 'KDE':
                kde_dist = fit_kde_dist(thetas)
                Z = kde_dist.pdf(thetas.transpose())
                score = stats.scoreatpercentile(Z, (1-alpha)*100) 
                g.map_offdiag(_add_scipy_dist_CI, color=colors[i], columns=theta_names, 
                            ncells=100, alpha=score, dist=kde_dist, 
                            theta_star=theta_star)
                legend_elements.append(matplotlib.lines.Line2D(
                    [0], [0], color=colors[i], lw=1, label=dist))
            
    _set_axis_limits(g, axis_limits, thetas, theta_star)
    
    for ax in g.axes.flatten():
        ax.ticklabel_format(style='sci', scilimits=(-2,2), axis='both')
        
        if add_legend:
            xvar, yvar, loc = _get_variables(ax, theta_names)
            if loc == (len(theta_names)-1,0):
                ax.legend(handles=legend_elements, loc='best', prop={'size': 8})
    if title:
        g.fig.subplots_adjust(top=0.9)
        g.fig.suptitle(title) 
        
    # Work in progress
    # Plot lower triangle graphics in separate figures, useful for presentations
    lower_triangle_only = False
    if lower_triangle_only:
        for ax in g.axes.flatten():
            xvar, yvar, (xloc, yloc) = _get_variables(ax, theta_names)
            if xloc < yloc: # lower triangle
                ax.remove()
                
                ax.set_xlabel(xvar)
                ax.set_ylabel(yvar)
                
                fig = plt.figure()
                ax.figure=fig
                fig.axes.append(ax)
                fig.add_axes(ax)
                
                f, dummy = plt.subplots()
                bbox = dummy.get_position()
                ax.set_position(bbox) 
                dummy.remove()
                plt.close(f)

                ax.tick_params(reset=True)
                
                if add_legend:
                    ax.legend(handles=legend_elements, loc='best', prop={'size': 8})
                
        plt.close(g.fig)
    
    if filename is None:
        plt.show()
    else:
        plt.savefig(filename)
        plt.close()
Exemplo n.º 4
0
def make3dPlot(expr, numticks=30, show_plot=False):
    ccSurf = [None] * ((numticks + 1)**2)
    cvSurf = [None] * ((numticks + 1)**2)
    fvals = [None] * ((numticks + 1)**2)
    xaxis2d = [None] * ((numticks + 1)**2)
    yaxis2d = [None] * ((numticks + 1)**2)
    ccAffine = [None] * ((numticks + 1)**2)
    cvAffine = [None] * ((numticks + 1)**2)

    eqn = mc(expr)
    vars = identify_variables(expr)
    x = next(vars)
    y = next(vars)
    x_tick_length = (x.ub - x.lb) / numticks
    y_tick_length = (y.ub - y.lb) / numticks
    xaxis = [x.lb + x_tick_length * n for n in range(numticks + 1)]
    yaxis = [y.lb + y_tick_length * n for n in range(numticks + 1)]

    # Making the affine tangent planes
    ccSlope = eqn.subcc()
    cvSlope = eqn.subcv()
    x_val = value(x)
    y_val = value(y)
    f_cc = eqn.concave()
    f_cv = eqn.convex()

    # To Visualize Concave Affine Plane for different points
    for i, x_tick in enumerate(xaxis):
        eqn.changePoint(x, x_tick)
        for j, y_tick in enumerate(yaxis):
            ccAffine[i + (numticks + 1) * j] = (
                ccSlope[x] * (x_tick - x_val) +
                ccSlope[y] * (y_tick - y_val) + f_cc)
            cvAffine[i + (numticks + 1) * j] = (
                cvSlope[x] * (x_tick - x_val) +
                cvSlope[y] * (y_tick - y_val) + f_cv)
            xaxis2d[i + (numticks + 1) * j] = x_tick
            yaxis2d[i + (numticks + 1) * j] = y_tick
            eqn.changePoint(y, y_tick)
            ccSurf[i + (numticks + 1) * j] = eqn.concave()
            cvSurf[i + (numticks + 1) * j] = eqn.convex()
            fvals[i + (numticks + 1) * j] = value(expr)

    if show_plot:
        from mpl_toolkits.mplot3d import Axes3D
        assert Axes3D  # silence pyflakes

        # Plotting Solutions in 3D
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1, projection='3d')
        ax.scatter(xaxis2d, yaxis2d, cvSurf, color='b')
        ax.scatter(xaxis2d, yaxis2d, fvals, color='r')
        ax.scatter(xaxis2d, yaxis2d, ccSurf, color='b')

        # To Visualize Concave Affine Plane for different points
        ax.scatter(xaxis2d, yaxis2d, cvAffine, color='k')

        # Create a better view
        ax.view_init(10, 270)
        plt.show()

    return ccSurf, cvSurf, ccAffine, cvAffine