Пример #1
0
def plot_joint_distribution_benefits_surplus(model_dict, df):
    """This function plots the joint distribution of benefits and surplus."""
    coeffs_cost = model_dict['COST']['all']

    B = df['Y1'] - df['Y0']
    Z = df[['Z_0', 'Z_1']]

    C = np.dot(coeffs_cost, Z.T) + df['UC']
    S = B - C
    sns.jointplot(S, B, stat_func=None).set_axis_labels('$S$', r'$B$')
Пример #2
0
def plot_pop_resids(msm, **kwargs):
    """
    Plot residuals between MSM populations and raw counts.

    Parameters
    ----------
    msm : msmbuilder.msm
        MSMBuilder MarkovStateModel
    **kwargs : dict, optional
        Extra arguments to pass to seaborn.jointplot

    Returns
    -------
    ax : matplotlib axis
        matplotlib figure axis

    """
    if hasattr(msm, 'all_populations_'):
        msm_pop = msm.populations_.mean(0)
    elif hasattr(msm, 'populations_'):
        msm_pop = msm.populations_

    raw_pop = msm.countsmat_.sum(1) / msm.countsmat_.sum()
    ax = sns.jointplot(np.log10(raw_pop),
                       np.log10(msm_pop),
                       kind='resid',
                       **kwargs)
    ax.ax_joint.set_xlabel('Raw Populations', size=20)
    ax.ax_joint.set_ylabel('Residuals', size=20)

    return ax
Пример #3
0
 def init_artists(self, ax, plot_data, plot_kwargs):
     if self.joint:
         if self.joint and self.subplot:
             raise Exception("Joint plots can't be animated or laid out in a grid.")
         return {'fig': sns.jointplot(*plot_data, **plot_kwargs).fig}
     else:
         return {'axis': sns.kdeplot(*plot_data, ax=ax, **plot_kwargs)}
Пример #4
0
 def init_artists(self, ax, plot_data, plot_kwargs):
     if self.joint:
         if self.joint and self.subplot:
             raise Exception("Joint plots can't be animated or laid out in a grid.")
         return {'fig': sns.jointplot(*plot_data, **plot_kwargs).fig}
     else:
         return {'axis': sns.kdeplot(*plot_data, ax=ax, **plot_kwargs)}
Пример #5
0
 def _update_plot(self, axis, view):
     if self.joint:
         self.style.pop('cmap', None)
         self.handles['fig'] = sns.jointplot(view.data[:,0],
                                             view.data[:,1],
                                             **self.style).fig
     else:
         label = view.label if self.overlaid == 1 else ''
         sns.kdeplot(view.data, ax=axis, label=label,
                     zorder=self.zorder, **self.style)
Пример #6
0
 def _update_plot(self, axis, view):
     if self.joint:
         self.style.pop('cmap', None)
         self.handles['fig'] = sns.jointplot(view.data[:,0],
                                             view.data[:,1],
                                             **self.style).fig
     else:
         label = view.label if self.overlaid == 1 else ''
         sns.kdeplot(view.data, ax=axis, label=label,
                     zorder=self.zorder, **self.style)
Пример #7
0
 def _update_plot(self, axis, view):
     if self.joint:
         self.style.pop('cmap', None)
         self.handles['fig'] = sns.jointplot(view.data[:,0],
                                             view.data[:,1],
                                             **self.style).fig
     else:
         kwargs = self.style[self.cyclic_index]
         label = view.label if self.overlaid >= 1 else ''
         if label:
             kwargs['label'] = label
         sns.kdeplot(view.data, ax=axis, zorder=self.zorder, **kwargs)
Пример #8
0
def vector_cloud_kde(experiment, kinematic, i=None):

    # test whether we are a simulation; if not, forbid plotting of  drivers
    if experiment.is_simulation is False:
        if kinematic not in ['position', 'velocity', 'acceleration']:
            raise TypeError("we don't know the mosquito drivers")

    labels = []
    for dim in ['x', 'y', 'z']:
        labels.append(kinematic + '_' + dim)

    ensemble = experiment.flights.get_trajectory_slice(i)

    # grab labels
    vecs = []

    if kinematic is "velocity":
        xlim, ylim = (-0.8, 0.8), (-0.8, 0.8)
    else:
        xlim, ylim = None, None

    selection = ensemble.loc[:, labels]

    sns.jointplot(x=labels[0], y=labels[1], data=selection, kind='kde', shade=True,
                  xlim=xlim, ylim=ylim, shade_lowest=False, space=0, stat_func=None)

    sns.jointplot(x=labels[0], y=labels[2], data=selection, kind='kde', shade=True,
                  xlim=xlim, ylim=ylim, shade_lowest=False, space=0, stat_func=None)

    sns.jointplot(x=labels[1], y=labels[2], data=selection, kind='kde', shade=True,
                  xlim=xlim, ylim=ylim, shade_lowest=False, space=0, stat_func=None)
Пример #9
0
def plot_distribution(df,
                      experiment_type,
                      color="C0",
                      title=None,
                      target_color="C2",
                      background_shade=-50,
                      crosshair=False,
                      drone_width=None):
    df_ex = df[df.experiment_type == experiment_type]
    df_arr = df_ex[df_ex.arrived == 1]
    if title is None:
        title = df_ex.experiment.iloc[0]
    g = sns.jointplot(df_arr.xn, df_arr.yn, kind="kde", space=0, color=color)
    g.plot_marginals(sns.rugplot, height=0.1, color=color)
    g.ax_joint.get_children()[0].set_zorder(-1)
    for child in g.ax_joint.get_children()[1:]:
        if isinstance(child, mpl.collections.PathCollection):
            child.set_alpha(0.8)
    plt.sca(g.ax_joint)
    plot_targets(show_start=False,
                 show_final=False,
                 target_coords=[TARGET],
                 target_color=target_color,
                 zorder=0,
                 crosshair=crosshair,
                 drone_width=drone_width)
    g.set_axis_labels("$x$", "$y$")
    plt.axis("equal")

    xmin, xmax = plt.xlim()
    ymin, ymax = plt.ylim()
    xsize = xmax - xmin
    ysize = ymax - ymin

    if xsize > ysize:
        ymin -= (xsize - ysize) / 2
        ymax += (xsize - ysize) / 2
        ysize = xsize
    else:
        xmin -= (ysize - xsize) / 2
        xmax += (ysize - xsize) / 2
        xsize = ysize

    plt.gca().add_patch(
        mpl.patches.Rectangle((min(xmin, ymin) - abs(min(xmin, ymin)),
                               min(xmin, ymin) - abs(min(xmin, ymin))),
                              max(xsize, ysize) * 5,
                              max(xsize, ysize) * 5,
                              color=change_color(color,
                                                 saturation=background_shade),
                              zorder=-1))
    g.fig.suptitle(title)
Пример #10
0
def plot_pop_resids(msm, **kwargs):
    if hasattr(msm, 'all_populations_'):
        msm_pop = msm.populations_.mean(0)
    elif hasattr(msm, 'populations_'):
        msm_pop = msm.populations_

    raw_pop = msm.countsmat_.sum(1) / msm.countsmat_.sum()
    ax = sns.jointplot(np.log10(raw_pop), np.log10(msm_pop), kind='resid',
                       **kwargs)
    ax.ax_joint.set_xlabel('Raw Populations', size=20)
    ax.ax_joint.set_ylabel('Residuals', size=20)

    return ax
Пример #11
0
def jointplot(x=None, y=None, data=None, xlabel=None, ylabel=None,
              color=None, zeromin=True, one2one=True):
    """ Plots the joint distribution of two variables via seaborn

    Parameters
    ----------
    x, y : array-like or string
        Sequences of values or column names found within ``data``.
    data : pandas DataFrame or None, optional
        An optional DataFrame containing the data.
    xlabel, ylabel : string, optional
        Overrides the default x- and y-axis labels.
    color : matplotlib color, optional
        Color used for the plot elements.
    zeromin : bool, optional
        When True (default), force lower axes limits to 0.
    one2one : bool, optional
        When True (default), plots the 1:1 line on the axis and sets
        the x- and y-axis limits to be equal.

    Returns
    -------
    jg : seaborn.JointGrid

    """
    jg = seaborn.jointplot(x=x, y=y, color=color, data=data,
                           marginal_kws=dict(rug=True, kde=True))

    if xlabel is None:
        xlabel = jg.ax_joint.get_xlabel()

    if ylabel is None:
        ylabel = jg.ax_joint.get_ylabel()

    jg.set_axis_labels(xlabel=xlabel, ylabel=ylabel)

    if zeromin:
        jg.ax_joint.set_xlim(left=0)
        jg.ax_joint.set_ylim(bottom=0)

    if one2one:
        ax_limit_max = np.max([jg.ax_joint.get_xlim(), jg.ax_joint.get_ylim()])
        jg.ax_joint.set_xlim(left=0, right=ax_limit_max)
        jg.ax_joint.set_ylim(bottom=0, top=ax_limit_max)
        jg.ax_joint.plot([0, ax_limit_max], [0, ax_limit_max], marker='None',
                     linestyle='-', linewidth=1.75, color=color or 'k',
                     alpha=0.45, label='1:1 line')

        jg.ax_joint.legend(frameon=False, loc='upper left')

    return jg
Пример #12
0
def graph_regression_numerical(dataset_id, df, col, target):
    """
    display a reg scatter plot graph of col in x axis and target in y axis

    :param dataset_id: id of the dataset
    :param df: dataframe, with col and target values
    :param col: name of column
    :param target: name of target column
    :return:
    """
    try:
        for dark, theme in [(True, 'dark_background'),
                            (False, 'seaborn-whitegrid')]:
            with plt.style.context(theme, after_reset=True):
                g = sns.jointplot(x=col, y=target, data=df, kind="kde", size=7)
                g.plot_joint(plt.scatter, s=5, alpha=0.7)
                g.ax_joint.collections[0].set_alpha(0)
                plt.xlim(__standard_range(df[col].values, 1, 99))
                plt.ylim(__standard_range(df[target].values, 1, 99))
                __save_fig(dataset_id, '_col_' + col, dark)
    except:
        log.error('error in graph_regression_numerical with dataset_id %s' %
                  dataset_id)
Пример #13
0
def graph_predict_regression(dataset, round_id, y, y_pred, part='eval'):
    """
    generate a graph prediction versus actuals

    :param dataset: dataset object
    :param round_id: id of the round
    :param y: actual values
    :param y_pred: predicted values
    :param part: part of the dataset
    :return: None
    """
    try:
        for dark, theme in [(True, 'dark_background'),
                            (False, 'seaborn-whitegrid')]:
            with plt.style.context(theme, after_reset=True):
                plt.figure(figsize=(6, 6))
                # plot a graph prediction versus actuals
                df = pd.DataFrame([y, y_pred]).transpose()
                df.columns = ['actuals', 'predict']
                g = sns.jointplot(x='actuals',
                                  y='predict',
                                  data=df,
                                  kind="kde",
                                  size=6)
                g.plot_joint(plt.scatter, s=5, alpha=0.8)
                g.ax_joint.collections[0].set_alpha(0)
                mn, mx = __standard_range(y, 1, 99)
                plt.plot((mn, mx), (mn, mx), color='r', lw=0.7)
                plt.xlim(mn, mx)
                plt.ylim(mn, mx)
                plt.title('%s' % part)
                __save_fig(dataset.dataset_id,
                           'predict_%s_%s' % (part, round_id), dark)
    except:
        log.error('error in graph_predict_regression with dataset_id %s' %
                  dataset.dataset_id)
Пример #14
0
def plottingData():

    df = pd.read_csv(os.getcwd() + "/" + args.data,
                     delimiter='\t',
                     header=0,
                     sep='\t')

    if ',' in args.option and not args.pdf:

        # Spliting variable
        numVar = args.option.split(',')

        # Deleting quotes
        numVar[0].strip('"')
        numVar[1].strip('"')
        fig, (ax1) = plt.subplots(nrows=1)

        # Color by the Probability Density Function.
        # Kernel density estimation is a way to estimate
        # the probability density function (PDF) of a random
        # variable in a non-parametric way

        # Setting data
        x = df[numVar[0]]
        y = df[numVar[1]]

        # Calculate the point density
        xy = np.vstack([x, y])
        z = gaussian_kde(xy)(xy)

        # Sort the points by density, so that the densest points are plotted last
        idx = z.argsort()
        x, y, z = x[idx], y[idx], z[idx]

        # Setting plot type
        pdf = ax1.scatter(x, y, c=z, s=50, edgecolor='')

        # Plot title
        ax1.set_title(numVar[0] + ' by ' + numVar[1])

        # Hide right and top spines
        ax1.spines['right'].set_visible(False)
        ax1.spines['top'].set_visible(False)
        ax1.yaxis.set_ticks_position('left')
        ax1.xaxis.set_ticks_position('bottom')

        # Set x and y limits
        xmin = df["" + numVar[0] + ""].min() - 1
        xmax = df["" + numVar[0] + ""].max() + 1
        ymin = df["" + numVar[1] + ""].min() - 1
        ymax = df["" + numVar[1] + ""].max() + 1
        plt.xlim(xmin, xmax)
        plt.ylim(ymin, ymax)

        # Set x and y labels
        plt.xlabel(numVar[0])
        plt.ylabel(numVar[1])

        # Adding the color bar
        colbar = plt.colorbar(pdf)
        colbar.set_label('Probability Density Function')
        plt.show()

    elif not ',' in args.option:

        fig, (ax1) = plt.subplots(nrows=1)
        ax1.plot(df['#Frame'], df[args.option])
        ax1.set_title(args.option + ' by Time')
        ax1.spines['right'].set_visible(False)
        ax1.spines['top'].set_visible(False)
        ax1.yaxis.set_ticks_position('left')
        ax1.xaxis.set_ticks_position('bottom')
        plt.xlabel('Time (ps)')
        xmin1 = df['#Frame'].min() - 1
        xmax1 = df['#Frame'].max() + 1
        plt.xlim(xmin1, xmax1)
        plt.ylabel(args.option)
        plt.show()

    elif ',' in args.option and args.pdf == 'kde':

        import seaborn.apionly as sns
        sns.set(style='white')
        numVar = args.option.split(',')
        numVar[0].strip('"')
        numVar[1].strip('"')

        # Distribution plot of two variables using KDE method with seaborn
        sns.jointplot(x=numVar[0],
                      y=numVar[1],
                      data=df,
                      kind="kde",
                      space=0,
                      color="b")
        plt.show()
Пример #15
0
    def jointplot(self, x=None, y=None, data=None, *args, **kwargs):
        """
        Fit and plot a univariate or bivariate kernel density estimate

        Parameters
        ----------
        x : a list of names of variable in data that need to visualize \
            their distribution

        y : a list of names of variable in data that need to visualize \
            its joint distribution against every x above

        data : pandas dataframe

        **kwargs : other arguments in seaborn.jointplot

            kind : { 'scatter' | 'reg' | 'resid' | 'kde' | 'hex' }, optional

            stat_func : callable or None, optional

            color : matplotlib color, optional

            size : numeric, optional

            ratio : numeric, optional

            space : numeric, optional

            dropna : bool, optional

            {x, y}lim : two-tuples, optional

            {joint, marginal, annot}_kws : dicts, optional

        Returns
        -------
        JointGrid object with the plot on it

        References
        ----------
        Seaborn jointplot further documentation
        https://seaborn.pydata.org/generated/seaborn.jointplot.html
        """
        # check data
        if not isinstance(data, (pd.DataFrame)):
            raise ValueError('data must be pandas dataframe')

        # check x and y
        if x is None:
            raise ValueError('x can NOT be None')
        else:  # x is NOT None
            if not isinstance(x, (list, tuple, np.ndarray, pd.Index)):
                x = [x]
        if y is None:
            raise ValueError('y can NOT be None')
        else:  # y is NOT None
            if not isinstance(y, (list, tuple, np.ndarray, pd.Index)):
                y = [y]

        # no figure configuration needed
        plt.close()
        # iterate thru x
        for i, col_y in enumerate(y):
            if col_y not in data.columns.values:
                raise ValueError('{} is NOT in data'.format(col_y))
            b = data[col_y]
            b_not_nan = np.ones(b.shape[0], dtype=np.bool)
            if np.logical_not(np.isfinite(b)).any():
                logger.warning('RUNTIME WARNING: {} column has inf or nan '
                               ''.format(col_y))
                b = b.replace([-np.inf, np.inf], np.nan)
                # filter
                b_not_nan = np.logical_not(b.isnull())

            for j, col_x in enumerate(x):
                # check if col in data
                if col_x not in data.columns.values:
                    raise ValueError('{} is NOT in data'.format(col_x))
                a = data[col_x]
                a_not_nan = np.ones(a.shape[0], dtype=np.bool)
                if np.logical_not(np.isfinite(a)).any():
                    logger.warning('RUNTIME WARNING: {} column has inf or '
                                   'nan'.format(col_x))
                    a = a.replace([-np.inf, np.inf], np.nan)
                    # filter
                    a_not_nan = np.logical_not(a.isnull())
                # joint filter
                not_nan = b_not_nan & a_not_nan
                joint_grid = sns.jointplot(x=a[not_nan],
                                           y=b[not_nan],
                                           size=self.size[0],
                                           *args,
                                           **kwargs)

                joint_grid.fig.axes[1].set_title(
                    label='Joint Distribution of {} and {} '
                    ''.format(col_y, col_x),
                    fontsize=self.title_fontsize)
                joint_grid.fig.axes[0].set_xlabel(xlabel=col_x,
                                                  fontsize=self.label_fontsize)
                joint_grid.fig.axes[0].set_ylabel(ylabel=col_y,
                                                  fontsize=self.label_fontsize)
                joint_grid.fig.axes[0].tick_params(
                    axis='both', which='maj', labelsize=self.tick_fontsize)
                joint_grid.fig.axes[0].legend(loc='upper right')
                joint_grid.fig.subplots_adjust(wspace=0.5,
                                               hspace=0.3,
                                               left=0.125,
                                               right=0.9,
                                               top=0.9,
                                               bottom=0.1)
                joint_grid.fig.tight_layout()
        plt.show()
Пример #16
0
def plot_joint_distribution_unobservables(df):
    """This function plots the joint distribution of the relevant unobservables."""
    sns.jointplot(df['V'], df['U1'],
                  stat_func=None).set_axis_labels('$V$', '$U_1$')
Пример #17
0
def plot_joint_distribution_potential(df):
    """This function plots the joint distribution of potential outcomes."""
    sns.jointplot(df['Y1'], df['Y0'],
                  stat_func=None).set_axis_labels('$Y_1$', r'$Y_0$')
Пример #18
0
plt.pie(med_age, labels=med_age.index, autopct='%1.1f%%')
plt.show()


counts_peace = peace_df['Sex'].value_counts()
pie(counts, labels=counts_peace.index, autopct='%1.1f%%')
show()

peace_age = peace_df['Age Category'].value_counts()
print(peace_age)
plt.pie(peace_age, labels=peace_age.index, autopct='%1.1f%%')
plt.show()


sns.jointplot(x="Year",
        y="Age",
        kind='reg',
        data=data)

plt.show()
sns.boxplot(data=data,
         x='Category',
         y='Age')

plt.show()
sns.lmplot('Year','Age',data=data,lowess=True, aspect=2,  line_kws={'color' : 'black'})
plt.show()


# Question 2: What words are most frequently written in the prize motivation?
top_N = 10
stopwords = nltk.corpus.stopwords.words('english')
Пример #19
0
 # plt.legend(loc = 'best')
 # plt.grid()
 # f.savefig('BPT.png')
 # plt.close(f)
 #EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE
 
 #ax.autoscale(False)
 H, xedges, yedges = np.histogram2d(xm.compressed(), ym.compressed(), bins = (40,40))
 extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
 
 #f = plt.figure()
 #f.set_dpi(100)
 #f.set_size_inches(10, 8)
 #ax = f.gca()
 #with sns.axes_style("white"):
 sns.jointplot(xm.compressed(), ym.compressed(), kind="hex")
 f = plt.gcf()
 f.set_dpi(100)
 f.set_size_inches(10, 8)
 ax = f.axes[0]
 #print f.axes    
 #ax = f.axes
 
 im = ax.imshow(H, cmap=plt.cm.Blues, aspect = 'auto', interpolation='none', origin='low', extent = extent)
 #ax.scatter(xm, ym, marker = 'o', c = 'g', s = 5, edgecolor = 'none', alpha = 0.1, label = '')
 #ax.contour(H,extent=extent,linewidths=1, interpolation='nearest', origin = 'lower', levels=[16, 50, 84], cmap = plt.cm.YlGn)
 #ax.hist2d(xm.compressed(), ym.compressed(), bins = (10,10))
 
 nBox = 1000
 #nBox = len(xm.compressed()) / 50.
 #slope, intercept, sigma_slope, sigma_intercep = OLS_bisector(logOH_M13, Zneb_mpa)