def plot_joint_distribution_benefits_surplus(model_dict, df): """This function plots the joint distribution of benefits and surplus.""" coeffs_cost = model_dict['COST']['all'] B = df['Y1'] - df['Y0'] Z = df[['Z_0', 'Z_1']] C = np.dot(coeffs_cost, Z.T) + df['UC'] S = B - C sns.jointplot(S, B, stat_func=None).set_axis_labels('$S$', r'$B$')
def plot_pop_resids(msm, **kwargs): """ Plot residuals between MSM populations and raw counts. Parameters ---------- msm : msmbuilder.msm MSMBuilder MarkovStateModel **kwargs : dict, optional Extra arguments to pass to seaborn.jointplot Returns ------- ax : matplotlib axis matplotlib figure axis """ if hasattr(msm, 'all_populations_'): msm_pop = msm.populations_.mean(0) elif hasattr(msm, 'populations_'): msm_pop = msm.populations_ raw_pop = msm.countsmat_.sum(1) / msm.countsmat_.sum() ax = sns.jointplot(np.log10(raw_pop), np.log10(msm_pop), kind='resid', **kwargs) ax.ax_joint.set_xlabel('Raw Populations', size=20) ax.ax_joint.set_ylabel('Residuals', size=20) return ax
def init_artists(self, ax, plot_data, plot_kwargs): if self.joint: if self.joint and self.subplot: raise Exception("Joint plots can't be animated or laid out in a grid.") return {'fig': sns.jointplot(*plot_data, **plot_kwargs).fig} else: return {'axis': sns.kdeplot(*plot_data, ax=ax, **plot_kwargs)}
def _update_plot(self, axis, view): if self.joint: self.style.pop('cmap', None) self.handles['fig'] = sns.jointplot(view.data[:,0], view.data[:,1], **self.style).fig else: label = view.label if self.overlaid == 1 else '' sns.kdeplot(view.data, ax=axis, label=label, zorder=self.zorder, **self.style)
def _update_plot(self, axis, view): if self.joint: self.style.pop('cmap', None) self.handles['fig'] = sns.jointplot(view.data[:,0], view.data[:,1], **self.style).fig else: kwargs = self.style[self.cyclic_index] label = view.label if self.overlaid >= 1 else '' if label: kwargs['label'] = label sns.kdeplot(view.data, ax=axis, zorder=self.zorder, **kwargs)
def vector_cloud_kde(experiment, kinematic, i=None): # test whether we are a simulation; if not, forbid plotting of drivers if experiment.is_simulation is False: if kinematic not in ['position', 'velocity', 'acceleration']: raise TypeError("we don't know the mosquito drivers") labels = [] for dim in ['x', 'y', 'z']: labels.append(kinematic + '_' + dim) ensemble = experiment.flights.get_trajectory_slice(i) # grab labels vecs = [] if kinematic is "velocity": xlim, ylim = (-0.8, 0.8), (-0.8, 0.8) else: xlim, ylim = None, None selection = ensemble.loc[:, labels] sns.jointplot(x=labels[0], y=labels[1], data=selection, kind='kde', shade=True, xlim=xlim, ylim=ylim, shade_lowest=False, space=0, stat_func=None) sns.jointplot(x=labels[0], y=labels[2], data=selection, kind='kde', shade=True, xlim=xlim, ylim=ylim, shade_lowest=False, space=0, stat_func=None) sns.jointplot(x=labels[1], y=labels[2], data=selection, kind='kde', shade=True, xlim=xlim, ylim=ylim, shade_lowest=False, space=0, stat_func=None)
def plot_distribution(df, experiment_type, color="C0", title=None, target_color="C2", background_shade=-50, crosshair=False, drone_width=None): df_ex = df[df.experiment_type == experiment_type] df_arr = df_ex[df_ex.arrived == 1] if title is None: title = df_ex.experiment.iloc[0] g = sns.jointplot(df_arr.xn, df_arr.yn, kind="kde", space=0, color=color) g.plot_marginals(sns.rugplot, height=0.1, color=color) g.ax_joint.get_children()[0].set_zorder(-1) for child in g.ax_joint.get_children()[1:]: if isinstance(child, mpl.collections.PathCollection): child.set_alpha(0.8) plt.sca(g.ax_joint) plot_targets(show_start=False, show_final=False, target_coords=[TARGET], target_color=target_color, zorder=0, crosshair=crosshair, drone_width=drone_width) g.set_axis_labels("$x$", "$y$") plt.axis("equal") xmin, xmax = plt.xlim() ymin, ymax = plt.ylim() xsize = xmax - xmin ysize = ymax - ymin if xsize > ysize: ymin -= (xsize - ysize) / 2 ymax += (xsize - ysize) / 2 ysize = xsize else: xmin -= (ysize - xsize) / 2 xmax += (ysize - xsize) / 2 xsize = ysize plt.gca().add_patch( mpl.patches.Rectangle((min(xmin, ymin) - abs(min(xmin, ymin)), min(xmin, ymin) - abs(min(xmin, ymin))), max(xsize, ysize) * 5, max(xsize, ysize) * 5, color=change_color(color, saturation=background_shade), zorder=-1)) g.fig.suptitle(title)
def plot_pop_resids(msm, **kwargs): if hasattr(msm, 'all_populations_'): msm_pop = msm.populations_.mean(0) elif hasattr(msm, 'populations_'): msm_pop = msm.populations_ raw_pop = msm.countsmat_.sum(1) / msm.countsmat_.sum() ax = sns.jointplot(np.log10(raw_pop), np.log10(msm_pop), kind='resid', **kwargs) ax.ax_joint.set_xlabel('Raw Populations', size=20) ax.ax_joint.set_ylabel('Residuals', size=20) return ax
def jointplot(x=None, y=None, data=None, xlabel=None, ylabel=None, color=None, zeromin=True, one2one=True): """ Plots the joint distribution of two variables via seaborn Parameters ---------- x, y : array-like or string Sequences of values or column names found within ``data``. data : pandas DataFrame or None, optional An optional DataFrame containing the data. xlabel, ylabel : string, optional Overrides the default x- and y-axis labels. color : matplotlib color, optional Color used for the plot elements. zeromin : bool, optional When True (default), force lower axes limits to 0. one2one : bool, optional When True (default), plots the 1:1 line on the axis and sets the x- and y-axis limits to be equal. Returns ------- jg : seaborn.JointGrid """ jg = seaborn.jointplot(x=x, y=y, color=color, data=data, marginal_kws=dict(rug=True, kde=True)) if xlabel is None: xlabel = jg.ax_joint.get_xlabel() if ylabel is None: ylabel = jg.ax_joint.get_ylabel() jg.set_axis_labels(xlabel=xlabel, ylabel=ylabel) if zeromin: jg.ax_joint.set_xlim(left=0) jg.ax_joint.set_ylim(bottom=0) if one2one: ax_limit_max = np.max([jg.ax_joint.get_xlim(), jg.ax_joint.get_ylim()]) jg.ax_joint.set_xlim(left=0, right=ax_limit_max) jg.ax_joint.set_ylim(bottom=0, top=ax_limit_max) jg.ax_joint.plot([0, ax_limit_max], [0, ax_limit_max], marker='None', linestyle='-', linewidth=1.75, color=color or 'k', alpha=0.45, label='1:1 line') jg.ax_joint.legend(frameon=False, loc='upper left') return jg
def graph_regression_numerical(dataset_id, df, col, target): """ display a reg scatter plot graph of col in x axis and target in y axis :param dataset_id: id of the dataset :param df: dataframe, with col and target values :param col: name of column :param target: name of target column :return: """ try: for dark, theme in [(True, 'dark_background'), (False, 'seaborn-whitegrid')]: with plt.style.context(theme, after_reset=True): g = sns.jointplot(x=col, y=target, data=df, kind="kde", size=7) g.plot_joint(plt.scatter, s=5, alpha=0.7) g.ax_joint.collections[0].set_alpha(0) plt.xlim(__standard_range(df[col].values, 1, 99)) plt.ylim(__standard_range(df[target].values, 1, 99)) __save_fig(dataset_id, '_col_' + col, dark) except: log.error('error in graph_regression_numerical with dataset_id %s' % dataset_id)
def graph_predict_regression(dataset, round_id, y, y_pred, part='eval'): """ generate a graph prediction versus actuals :param dataset: dataset object :param round_id: id of the round :param y: actual values :param y_pred: predicted values :param part: part of the dataset :return: None """ try: for dark, theme in [(True, 'dark_background'), (False, 'seaborn-whitegrid')]: with plt.style.context(theme, after_reset=True): plt.figure(figsize=(6, 6)) # plot a graph prediction versus actuals df = pd.DataFrame([y, y_pred]).transpose() df.columns = ['actuals', 'predict'] g = sns.jointplot(x='actuals', y='predict', data=df, kind="kde", size=6) g.plot_joint(plt.scatter, s=5, alpha=0.8) g.ax_joint.collections[0].set_alpha(0) mn, mx = __standard_range(y, 1, 99) plt.plot((mn, mx), (mn, mx), color='r', lw=0.7) plt.xlim(mn, mx) plt.ylim(mn, mx) plt.title('%s' % part) __save_fig(dataset.dataset_id, 'predict_%s_%s' % (part, round_id), dark) except: log.error('error in graph_predict_regression with dataset_id %s' % dataset.dataset_id)
def plottingData(): df = pd.read_csv(os.getcwd() + "/" + args.data, delimiter='\t', header=0, sep='\t') if ',' in args.option and not args.pdf: # Spliting variable numVar = args.option.split(',') # Deleting quotes numVar[0].strip('"') numVar[1].strip('"') fig, (ax1) = plt.subplots(nrows=1) # Color by the Probability Density Function. # Kernel density estimation is a way to estimate # the probability density function (PDF) of a random # variable in a non-parametric way # Setting data x = df[numVar[0]] y = df[numVar[1]] # Calculate the point density xy = np.vstack([x, y]) z = gaussian_kde(xy)(xy) # Sort the points by density, so that the densest points are plotted last idx = z.argsort() x, y, z = x[idx], y[idx], z[idx] # Setting plot type pdf = ax1.scatter(x, y, c=z, s=50, edgecolor='') # Plot title ax1.set_title(numVar[0] + ' by ' + numVar[1]) # Hide right and top spines ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.yaxis.set_ticks_position('left') ax1.xaxis.set_ticks_position('bottom') # Set x and y limits xmin = df["" + numVar[0] + ""].min() - 1 xmax = df["" + numVar[0] + ""].max() + 1 ymin = df["" + numVar[1] + ""].min() - 1 ymax = df["" + numVar[1] + ""].max() + 1 plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) # Set x and y labels plt.xlabel(numVar[0]) plt.ylabel(numVar[1]) # Adding the color bar colbar = plt.colorbar(pdf) colbar.set_label('Probability Density Function') plt.show() elif not ',' in args.option: fig, (ax1) = plt.subplots(nrows=1) ax1.plot(df['#Frame'], df[args.option]) ax1.set_title(args.option + ' by Time') ax1.spines['right'].set_visible(False) ax1.spines['top'].set_visible(False) ax1.yaxis.set_ticks_position('left') ax1.xaxis.set_ticks_position('bottom') plt.xlabel('Time (ps)') xmin1 = df['#Frame'].min() - 1 xmax1 = df['#Frame'].max() + 1 plt.xlim(xmin1, xmax1) plt.ylabel(args.option) plt.show() elif ',' in args.option and args.pdf == 'kde': import seaborn.apionly as sns sns.set(style='white') numVar = args.option.split(',') numVar[0].strip('"') numVar[1].strip('"') # Distribution plot of two variables using KDE method with seaborn sns.jointplot(x=numVar[0], y=numVar[1], data=df, kind="kde", space=0, color="b") plt.show()
def jointplot(self, x=None, y=None, data=None, *args, **kwargs): """ Fit and plot a univariate or bivariate kernel density estimate Parameters ---------- x : a list of names of variable in data that need to visualize \ their distribution y : a list of names of variable in data that need to visualize \ its joint distribution against every x above data : pandas dataframe **kwargs : other arguments in seaborn.jointplot kind : { 'scatter' | 'reg' | 'resid' | 'kde' | 'hex' }, optional stat_func : callable or None, optional color : matplotlib color, optional size : numeric, optional ratio : numeric, optional space : numeric, optional dropna : bool, optional {x, y}lim : two-tuples, optional {joint, marginal, annot}_kws : dicts, optional Returns ------- JointGrid object with the plot on it References ---------- Seaborn jointplot further documentation https://seaborn.pydata.org/generated/seaborn.jointplot.html """ # check data if not isinstance(data, (pd.DataFrame)): raise ValueError('data must be pandas dataframe') # check x and y if x is None: raise ValueError('x can NOT be None') else: # x is NOT None if not isinstance(x, (list, tuple, np.ndarray, pd.Index)): x = [x] if y is None: raise ValueError('y can NOT be None') else: # y is NOT None if not isinstance(y, (list, tuple, np.ndarray, pd.Index)): y = [y] # no figure configuration needed plt.close() # iterate thru x for i, col_y in enumerate(y): if col_y not in data.columns.values: raise ValueError('{} is NOT in data'.format(col_y)) b = data[col_y] b_not_nan = np.ones(b.shape[0], dtype=np.bool) if np.logical_not(np.isfinite(b)).any(): logger.warning('RUNTIME WARNING: {} column has inf or nan ' ''.format(col_y)) b = b.replace([-np.inf, np.inf], np.nan) # filter b_not_nan = np.logical_not(b.isnull()) for j, col_x in enumerate(x): # check if col in data if col_x not in data.columns.values: raise ValueError('{} is NOT in data'.format(col_x)) a = data[col_x] a_not_nan = np.ones(a.shape[0], dtype=np.bool) if np.logical_not(np.isfinite(a)).any(): logger.warning('RUNTIME WARNING: {} column has inf or ' 'nan'.format(col_x)) a = a.replace([-np.inf, np.inf], np.nan) # filter a_not_nan = np.logical_not(a.isnull()) # joint filter not_nan = b_not_nan & a_not_nan joint_grid = sns.jointplot(x=a[not_nan], y=b[not_nan], size=self.size[0], *args, **kwargs) joint_grid.fig.axes[1].set_title( label='Joint Distribution of {} and {} ' ''.format(col_y, col_x), fontsize=self.title_fontsize) joint_grid.fig.axes[0].set_xlabel(xlabel=col_x, fontsize=self.label_fontsize) joint_grid.fig.axes[0].set_ylabel(ylabel=col_y, fontsize=self.label_fontsize) joint_grid.fig.axes[0].tick_params( axis='both', which='maj', labelsize=self.tick_fontsize) joint_grid.fig.axes[0].legend(loc='upper right') joint_grid.fig.subplots_adjust(wspace=0.5, hspace=0.3, left=0.125, right=0.9, top=0.9, bottom=0.1) joint_grid.fig.tight_layout() plt.show()
def plot_joint_distribution_unobservables(df): """This function plots the joint distribution of the relevant unobservables.""" sns.jointplot(df['V'], df['U1'], stat_func=None).set_axis_labels('$V$', '$U_1$')
def plot_joint_distribution_potential(df): """This function plots the joint distribution of potential outcomes.""" sns.jointplot(df['Y1'], df['Y0'], stat_func=None).set_axis_labels('$Y_1$', r'$Y_0$')
plt.pie(med_age, labels=med_age.index, autopct='%1.1f%%') plt.show() counts_peace = peace_df['Sex'].value_counts() pie(counts, labels=counts_peace.index, autopct='%1.1f%%') show() peace_age = peace_df['Age Category'].value_counts() print(peace_age) plt.pie(peace_age, labels=peace_age.index, autopct='%1.1f%%') plt.show() sns.jointplot(x="Year", y="Age", kind='reg', data=data) plt.show() sns.boxplot(data=data, x='Category', y='Age') plt.show() sns.lmplot('Year','Age',data=data,lowess=True, aspect=2, line_kws={'color' : 'black'}) plt.show() # Question 2: What words are most frequently written in the prize motivation? top_N = 10 stopwords = nltk.corpus.stopwords.words('english')
# plt.legend(loc = 'best') # plt.grid() # f.savefig('BPT.png') # plt.close(f) #EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE #ax.autoscale(False) H, xedges, yedges = np.histogram2d(xm.compressed(), ym.compressed(), bins = (40,40)) extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]] #f = plt.figure() #f.set_dpi(100) #f.set_size_inches(10, 8) #ax = f.gca() #with sns.axes_style("white"): sns.jointplot(xm.compressed(), ym.compressed(), kind="hex") f = plt.gcf() f.set_dpi(100) f.set_size_inches(10, 8) ax = f.axes[0] #print f.axes #ax = f.axes im = ax.imshow(H, cmap=plt.cm.Blues, aspect = 'auto', interpolation='none', origin='low', extent = extent) #ax.scatter(xm, ym, marker = 'o', c = 'g', s = 5, edgecolor = 'none', alpha = 0.1, label = '') #ax.contour(H,extent=extent,linewidths=1, interpolation='nearest', origin = 'lower', levels=[16, 50, 84], cmap = plt.cm.YlGn) #ax.hist2d(xm.compressed(), ym.compressed(), bins = (10,10)) nBox = 1000 #nBox = len(xm.compressed()) / 50. #slope, intercept, sigma_slope, sigma_intercep = OLS_bisector(logOH_M13, Zneb_mpa)