def createGraphTEST(folder, inputdata=0): sns.set(style="whitegrid") networks = sns.load_dataset("brain_networks", index_col=0, header=[0, 1, 2]) networks = networks.T.groupby(level="network").mean().T order = networks.std().sort_values().index sns.lvplot(data=networks, order=order, scale="linear", palette="mako").figure.savefig(folder + os.sep + "result.png")
def plot(self, score=None, data=None, bw=0.15, jitter=0.05): if score is None: return if data is None: data = self.results() sb.lvplot(y='model', x=score, data=data) sb.violinplot(y='model', x=score, data=data, inner='quartile', bw=bw) sb.swarmplot(y='model', x=score, data=data, color='w', alpha=0.5) sb.stripplot(y='model', x=score, data=data, color='w', alpha=0.5, jitter=jitter) sb.pointplot(y='model', x=score, data=data, color='k') plt.title(score)
def boxplot_violinplot_lvplot(df): # Create a boxplot sns.boxplot(data=df, x='Award_Amount', y='Model Selected') plt.show() plt.clf() # Create a violinplot with the husl palette sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl') plt.show() plt.clf() # Create a lvplot with the Paired palette and the Region column as the hue sns.lvplot(data=df, x='Award_Amount', y='Model Selected', palette='Paired', hue='Region') plt.show() plt.clf()
def plot_var_distribution_over_time(df, vintage_var, vars, missing_value=-999.0): ''' INPUT: df; dataframe containing observations over a time period vintage_var; year-month 'vintage', should look like YYYY-MM, must be present in df vars; variables to plot distribution of over time missing_value; value imputed to fill existing nulls, replaced with nulls title; plot title OUTPUT: plots of variable distributions over time represented in df AUTHOR: jonoleson ''' for var in vars: temp = df[[vintage_var, var]].copy() temp.loc[:,var] = temp[var].replace(missing_value, np.nan) #re-introduce nulls temp.sort_values(vintage_var, inplace=True) f, ax = plt.subplots(figsize=(10,8)) sns.lvplot(x=vintage_var, y=var, data=temp, ax=ax) plt.xticks(rotation=70) ax.set_title(var+' Distribution by Vintage', fontdict=font) plt.show()
# bw = 0.8 #控制拟合程度,一般可以不设置 ) plt.show() # 通过hue再分类 sns.violinplot(x='day',y='total_bill',data=tips, hue='smoker',palette='muted', split=True, # 设置是否拆分小提琴图 inner='quartile') plt.show() # 结合散点图 sns.violinplot(x='day',y='total_bill',data=tips,palette='muted',inner=None) sns.swarmplot(x='day',y='total_bill',data=tips,color='w',alpha=0.5) plt.show() ''' # LV图表 - lvplot() ''' tips = pd.read_csv('C:\\Users\刘邦国\PycharmProjects\study\data_visual\\tips.csv') # 绘制LV图 sns.lvplot(x='day',y='total_bill',data=tips,palette='mako', # hue='smoker', width=0.8, # 箱之间的间隔比例 linewidth=12, scale='area', # 设置框的大小 --> 'linear'、'exonential'、'area' k_depth='proportion' # 设置框的数量 --> 'proportion','tukey','trustworthy' ) # 可以添加散点图 sns.swarmplot(x='day',y='total_bill',data=tips,color='k',size=3,alpha=0.8) plt.show() ''' # 统计图----
df_yout['month'] = df_yout['publish_time'].dt.month print("Category Name count") print(df_yout['month'].value_counts()[:5]) plt.figure(figsize=(14, 9)) plt.subplot(211) g = sns.countplot('month', data=df_yout, palette="Set1") g.set_xticklabels(g.get_xticklabels(), rotation=45) g.set_title("Counting Months ", fontsize=20) g.set_xlabel("Months", fontsize=15) g.set_ylabel("Count", fontsize=15) plt.subplot(212) g1 = sns.lvplot(x='month', y='like_rate', data=df_yout, palette="Set1") g1.set_xticklabels(g.get_xticklabels(), rotation=45) g1.set_title("Like Rate by Month", fontsize=20) g1.set_xlabel("Months", fontsize=15) g1.set_ylabel("Like Rate(log)", fontsize=15) plt.subplots_adjust(hspace=0.5, top=0.9) plt.show() # ## Let's extract the hour value of datetime' # In[ ]: # separates date and time into two columns from 'publish_time' column df_yout.insert(4, 'publish_date', df_yout['publish_time'].dt.date)
sns.heatmap(corr, annot=True) plt.title('Correlation between variables') # Pair-plot to show relationships between variables plt.figure(figsize=(10, 10)) sns.pairplot(dataset, hue='target') plt.show() # Boxplot columns = [x for x in dataset.columns if x not in 'target'] length = len(columns) plt.figure(figsize=(15, 15)) for i, j in itertools.zip_longest(columns, range(length)): plt.subplot(4, 2, j + 1) sns.lvplot(x=dataset['target'], y=dataset[i]) plt.title(i) plt.axhline(dataset[i].mean(), linestyle='dashed') # # Violinplot columns = [x for x in dataset.columns if x not in 'target'] length = len(columns) for i, j in itertools.zip_longest(columns, range(length)): plt.subplot(4, 2, j + 1) sns.violinplot(x=dataset['target'], y=dataset[i]) plt.title(i) # Train-Test Split X = dataset.drop(columns='target') Y = dataset['target'].values
health_frame = pd.read_csv("../../build/health/{}-health.csv".format(name)) metric = health_metric(health_frame) avg_props.ix[i, 'health'] = metric.mean() group1 = avg_props[avg_props['cluster'] == 0]['health'] group2 = avg_props[avg_props['cluster'] == 1]['health'] print("Group 1: {} (+/- {}".format(group1.mean(), group1.std())) print("Group 2: {} (+/- {}".format(group2.mean(), group2.std())) statistic, pval = ttest_ind(group1, group2, equal_var=False) print("t-statistic: {}, pvalue: {}".format(statistic, pval)) plt.figure(figsize=(3.5,3)) colors = seaborn.color_palette('magma', n_colors=5)[3:] plot = seaborn.lvplot(data=[group1, group2], palette=colors) seaborn.despine() plot.minorticks_on() plot.tick_params(axis='x', which='minor', bottom='off') plot.set_xlabel('') h_display = health_display[sys.argv[1]] plot.set_ylabel(h_display[0]) if h_display[1] is not None: plot.set_ylim(h_display[1]) labels = ["Cluster {}".format(i) for i in range(1, 3)] plot.set_xticklabels(labels) plt.tight_layout() plt.savefig("{}.pdf".format(sys.argv[1])) plt.close() for i, centroid in enumerate(clusterer.cluster_centers_):
.toPandas() # A bar plot: sns.barplot(x="rider_student", y = "distance", data=sample_pdf) # A point plot: sns.pointplot(x="rider_student", y = "distance", data=sample_pdf) # A strip plot: sns.stripplot(x="rider_student", y="distance", data=sample_pdf, jitter=True) # A swarm plot: sns.swarmplot(x="rider_student", y="distance", data=sample_pdf) # A letter value plot: sns.lvplot(x="rider_student", y="distance", data=sample_pdf) # A box plot: sns.boxplot(x="rider_student", y="distance", data=sample_pdf) # A violin plot: sns.violinplot(x="rider_student", y="distance", data=sample_pdf) # ## Exploring more than two variables # There are numerous ways to explore more than two variables. The appropriate # table or plot depends on the variable types and particular question you are # trying to answer. We highlight a few common approaches below. # ### N-way summary tables
kde=False) ax = ax.set_title('ILP') ax = sns.distplot(sdss_df[sdss_df['Category_list'] == 'Annuity'].totalpremium, bins=30, ax=axes[3], kde=False) ax = ax.set_title('Annuity') ax = sns.distplot(sdss_df[sdss_df['Category_list'] == 'Others'].totalpremium, bins=30, ax=axes[4], kde=False) ax = ax.set_title('Others') fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 4)) ax = sns.lvplot(x=sdss_df['Category_list'], y=sdss_df['Category_count'], palette='coolwarm') ax.set_title('Category_count') fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 4)) ax = sns.lvplot(x=sdss_df['Category_list'], y=sdss_df['PH_AgeGroup'], palette='coolwarm') ax.set_title('PH_AgeGroup') fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(16, 4)) ax = sns.lvplot(x=sdss_df['Category_list'], y=sdss_df['addr_type_desc'], palette='coolwarm') ax.set_title('addr_type_desc')
""" Plotting large distributions ============================ """ import seaborn as sns sns.set(style="whitegrid") networks = sns.load_dataset("brain_networks", index_col=0, header=[0, 1, 2]) networks = networks.T.groupby(level="network").mean().T order = networks.std().sort_values().index sns.lvplot(data=networks, order=order, scale="linear", palette="mako")
total_user.append(int(row[3])) print 'Maximum Age:', max(active_age), 'Minimum Age:', min(active_age) print 'Maximum User:'******'Minimum User:'******'Maximum Post:', max(total_post), 'Minimum Post:', min(total_post) cmap = matplotlib.cm.get_cmap('Greys') rgb_1 = cmap(0.3) rgb_2 = cmap(0.45) rgb_3 = cmap(0.6) #Plotting by seaborn f, (ax1, ax2, ax3) = plt.subplots(nrows=1, ncols=3) sns.lvplot(y=active_age, ax=ax1, color=rgb_1) sns.lvplot(y=total_user, ax=ax2, color=rgb_2) sns.lvplot(y=total_post, ax=ax3, color=rgb_3) ax1.set(xlabel='# of Months') ax2.set(xlabel='# of Users') ax3.set(xlabel='# of Posts') ax1.set_yscale('log') ax1.minorticks_off() ax1.yaxis.set_ticks(np.logspace(1, 2, 2)) ax2.set_yscale('log') ax2.minorticks_off() ax2.yaxis.set_ticks(np.logspace(3, 6, 4))
for i, age in enumerate(age_list): bin_id = (age-min(age_list))*5/(max(age_list)+1-min(age_list))+1 age_wise_contribution_bins[bin_id].append(contribution_list[i]) #Plotting by seaborn cmap = matplotlib.cm.get_cmap('Greys') rgb_1 = cmap(0.3) rgb_2 = cmap(0.37) rgb_3 = cmap(0.44) rgb_4 = cmap(0.51) rgb_5 = cmap(0.58) f, (ax1, ax2, ax3, ax4, ax5) = plt.subplots(nrows = 1, ncols = 5, sharey=True) sns.lvplot(y = age_wise_contribution_bins[1], ax = ax1, color = rgb_1) sns.lvplot(y = age_wise_contribution_bins[2], ax = ax2, color = rgb_2) sns.lvplot(y = age_wise_contribution_bins[3], ax = ax3, color = rgb_3) sns.lvplot(y = age_wise_contribution_bins[4], ax = ax4, color = rgb_4) sns.lvplot(y = age_wise_contribution_bins[5], ax = ax5, color = rgb_5) ax3.set_title('ANDROID') ax1.set(xlabel='1 - 13') ax2.set(xlabel='14 - 26') ax3.set(xlabel='27 - 39\n # of Months') ax4.set(xlabel='40 - 52') ax5.set(xlabel='53 - 65') ax1.set(ylabel='# of Answers')
data=tips, hue='smoker', palette='muted', split=True, #设置是否拆分小提琴图 inner='quartile') #2.violinplot() #结合散点图 sns.violinplot(x='day', y='total_bill', data=tips, palette='hls', inner=None) sns.swarmplot(x='day', y='total_bill', data=tips, color='w', alpha=.5) #3.lvplot() #LV图表 sns.lvplot( x='day', y='total_bill', data=tips, palette='mako', #hue = 'smoker', width=0.8, #箱之间间隔比例 linewidth=12, scale='area', #设置框的大小 -> 'linear','exonential','area' k_depth='proportion', #设置框的数量 -> 'proportion','tukey','trustworthy' ) #绘制LV图 sns.swarmplot(x='day', y='total_bill', data=tips, color='k', alpha=0.8) #可以添加散点图
# Vertical Bars sns.set(style="whitegrid") networks = sns.load_dataset("brain_networks", index_col=0, header=[0, 1, 2]) networks = networks.T.groupby(level="network").mean().T order = networks.std().sort_values().index sns.lvplot(data=networks, order=order, scale="linear", palette="mako") # Cool Hexbin import numpy as np from scipy.stats import kendalltau import seaborn as sns sns.set(style="ticks") rs = np.random.RandomState(11) x = rs.gamma(2, size=1000)
def plot_neuron_activity(activations, num_neuron_samples: int, mode="lv", color=None, neuron_samples=None, save=None): """ Plot the activation values for a certain number of randomly sampled neurons over all samples. """ assert mode in ( "lv", "boxes" ), "Mode has to be either 'lv' or 'boxes, '{}' found.".format(mode) num_neurons = activations[0].shape[1] sampled_neurons = neuron_samples if neuron_samples is None: sampled_neurons = sample(range(num_neurons), k=num_neuron_samples) activations_per_neuron = np.empty((num_neuron_samples, 0)) # Aggregate activations for sample neurons for data_point in activations: for time_step in data_point: current_neuron_activations = time_step[sampled_neurons].reshape( -1, 1) activations_per_neuron = np.concatenate( (activations_per_neuron, current_neuron_activations), axis=1) # Create LV plot if mode == "lv": # Create pandas data frame object rows, columns = activations_per_neuron.shape data = pd.DataFrame( data={ "activation": activations_per_neuron.reshape(rows * columns, ), "neuron": np.concatenate([ np.array([neuron_index] * columns) for neuron_index in range(1, rows + 1) ], axis=0) }) # Visualize using Letter value plot ax = sns.lvplot(x="neuron", y="activation", scale="linear", data=data, width=1, palette=sns.color_palette("viridis", n_colors=num_neuron_samples)) # Create box and whiskers plot elif mode == "boxes": fig, axis = plt.subplots(1, 1) ax = axis # Sort neurons by variance and mean for better readability def _box_size(data): q1, med, q3 = np.percentile(data, [25, 50, 75]) return np.abs(q1 - med) + np.abs(q3 - med) activations_per_neuron = np.array_split(activations_per_neuron, num_neuron_samples, axis=0) activations_per_neuron = sorted(activations_per_neuron, key=lambda x: _box_size(x)) bplot = ax.boxplot([ activations_per_neuron[i].squeeze() for i in range(num_neuron_samples) ], vert=True, sym="", patch_artist=True, whis=10000, notch=True, manage_xticks=False) # Show min and max by setting whis very high # Coloring for patch in bplot["boxes"]: color = "tab:blue" if color is None else color patch.set_facecolor(color) ax.set_xticks([ i for i in range(1, num_neuron_samples + 1, int(num_neuron_samples / 10)) ]) if save is None: plt.show() else: plt.savefig(save, bbox_inches="tight") plt.close()
def plotDrawOk(self): self.selection = self.combobox.get() self.selection2 = self.combobox2.get() self.selection3 = self.combobox3.get() self.selectionLeft = self.comboboxLeft.get() window = Toplevel(self.plot_frame) if self.selection3 == 'plot': x = df[self.selection] y = df[self.selection2] self.fig, self.ax = plt.subplots(figsize=(8, 8)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) plt.plot(x[df.left == int(self.selectionLeft)], y[df.left == int(self.selectionLeft)], 'o', alpha=0.1) plt.ylabel(self.selection2) plt.title('Employees who left') plt.xlabel(self.selection) self.canvas.draw() if self.selection3 == 'countplot': self.fig, self.ax = plt.subplots(figsize=(15, 5)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) if self.checkCmd.get() == 1: sns.countplot(y=self.selection, hue='left', data=df).set_title('Employee ' + self.selection + ' Turnover Distribution') if self.checkCmd.get() == 0: sns.countplot(y=self.selection, data=df).set_title('Employee ' + self.selection + ' Turnover Distribution') if self.selection3 == 'barplot': self.fig, self.ax = plt.subplots(figsize=(15, 5)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) if self.checkCmd.get() == 1: sns.barplot(df[self.selection], df[self.selection2], hue=df.left) if self.checkCmd.get() == 0: sns.barplot(df[self.selection], df[self.selection2]) if self.selection3 == 'barplotPercentage': self.fig, self.ax = plt.subplots(figsize=(15, 5)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) if self.checkCmd.get() == 1: ax = sns.barplot(x=self.selection, y=self.selection, hue="left", data=df, estimator=lambda x: len(x) / len(df) * 100) if self.checkCmd.get() == 0: ax = sns.barplot(x=self.selection, y=self.selection, data=df, estimator=lambda x: len(x) / len(df) * 100) ax.set(ylabel="Percent") if self.selection3 == 'kdeplot': self.fig, self.ax = plt.subplots(figsize=(15, 5)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) ax = sns.kdeplot(df.loc[(df['left'] == 0), self.selection], color='b', shade=True, label='no turnover') ax = sns.kdeplot(df.loc[(df['left'] == 1), self.selection], color='r', shade=True, label='turnover') plt.title('Employee ' + self.selection + ' Distribution - Turnover V.S. No Turnover') if self.selection3 == 'distplot': self.fig, self.ax = plt.subplots(figsize=(7, 7)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) plt.xlabel(self.selection, fontsize=12) plt.ylabel('distribution', fontsize=12) sns.distplot(df[self.selection], kde=True) if self.selection3 == 'stripplot': self.fig, self.ax = plt.subplots(figsize=(7, 7)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) sns.stripplot(df[self.selection], df[self.selection2]) if self.selection3 == 'pointplot': self.fig, self.ax = plt.subplots(figsize=(7, 7)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) sns.pointplot(df[self.selection], df[self.selection2]) if self.selection3 == 'lvplot': self.fig, self.ax = plt.subplots(figsize=(7, 7)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) sns.lvplot(df[self.selection], df[self.selection2]) if self.selection3 == 'factorplot': self.fig, self.ax = plt.subplots(figsize=(7, 7)) self.canvas = FigureCanvasTkAgg(self.fig, window) self.canvas.get_tk_widget().pack(side='bottom', fill='both', expand=1) sns.factorplot(y=self.selection2, x=self.selection, data=df, kind="box", ax=self.ax) plt.tight_layout() plt.gcf().clear()
ransac.fit(np.array(user_count).reshape(-1, 1), np.array(answerer_count).reshape(-1, 1)) inlier_mask = ransac.inlier_mask_ r_answerer.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(answerer_count).reshape(-1, 1)[inlier_mask])) ransac = linear_model.RANSACRegressor() ransac.fit(np.array(user_count).reshape(-1, 1), np.array(commenter_count).reshape(-1, 1)) inlier_mask = ransac.inlier_mask_ r_commenter.append(ransac.score(np.array(user_count).reshape(-1, 1)[inlier_mask], np.array(commenter_count).reshape(-1, 1)[inlier_mask])) current_site = row[0] asker_count[:] = [] answerer_count[:] = [] commenter_count[:] = [] user_count[:] = [] asker_count.append(int(row[6])) answerer_count.append(int(row[7])) commenter_count.append(int(row[12])) user_count.append(int(row[14])) df = pd.DataFrame({'Asker': r_asker, 'Answerer': r_answerer, 'Commenter': r_commenter}) ax = sns.lvplot(data = df, palette=sns.mpl_palette("gist_yarg")) ax.set(ylabel='Coeff. of Determination, $R^2$') ax.set_yticks(np.arange(0.0, 1.0, 0.05), minor=True) sns.despine(offset = 10, trim=True, bottom = True) sns.plt.tight_layout() plt.savefig('User_to_Roles_R_Squared_LV.pdf')
ax=ax1) ax1.set_xticklabels(ax1.get_xticklabels(),rotation=90) ax1.set_title("bedrooms counting", fontsize=15) ax1.set_xlabel("Bathrooms number") ax1.set_ylabel("count") ax2 = plt.subplot(222) ax2 = sns.regplot(x="bedrooms", y='price', data=df_usa, ax=ax2, x_jitter=True) ax2.set_xticklabels(ax2.get_xticklabels(),rotation=90) ax2.set_title("Bedrooms distribution price", fontsize=15) ax2.set_xlabel("Bedrooms number") ax2.set_ylabel("log Price(US)") ax0 = plt.subplot(212) ax0 = sns.lvplot(x="bedrooms", y="price", data=df_usa) ax0.set_title("Better understaning price", fontsize=15) ax0.set_xlabel("Bedrooms") ax0.set_ylabel("log Price(US)") ax0.set_xticklabels(ax0.get_xticklabels(),rotation=90) plt.show() # In[ ]: print("Floors counting description") print(df_usa['floors'].value_counts())
#1. stripplot, swarmplot : plots of each observation #2. boxplot, violinplot, lvplot: abstract representation #3. barplot, pointplot, countplot: statistical estimates #stripplot sns.striplot(data=df, y="DRG Definition", x="Average Covered Charges", jitter=True) #swarmplot sns.swarmplot(data=df, y="DRG Definition", x="Average Covered Charges") #boxplot sns.boxplot(data=df, y="DRG Definition", x="Average Covered Charges") #violinplot sns.violinplot(data=df, y="DRG Definition", x="Average Covered Charges") #lvplot:letter value plot for large datasets sns.lvplot(data=df, y="DRG Definition", x="Average Covered Charges") #barplot sns.barplot(data=df, y="DRG Definition", x="Average Covered Charges", hue="Region") #pointplot sns.pointplot(data=df, y="DRG Definition", x="Average Covered Charges", hue="Region") #countplot sns.countplot(data=df, y="DRG_Code", hue="Region") #ex.1 #part 1 # Create the stripplot sns.stripplot(data=df, x='Award_Amount',
# Boxplot - used to show several measures related to dist of data incl median, upper and lower quartiles and outliers sns.boxplot(data=df, y="DRG Definition", x="Average Covered Charges") # Violinplot - combination of kernel density plot and boxplot, suitable for providing an alternative view of the dist of data sns.violinplot(data=df, y="DRG Definition", x="Average Covered Charges") # As uses a kernel density function, does not show all datapoints # Useful for large datasets, can be computationally intensive to create # lvplot - letter value plot sns.lvplot(data=df, y="DRG Definition", x="Average Covered Charges") # API same as boxplot and violin plot # Hybrid between boxplot and violin plot # Relatively quick to render and easy to interpret # Barplot - shows estimate of value and confidence interval sns.barplot(data=df, y="DRG Definition", x="Average Covered Charges", hue="Region") # Pointplot - similar to barplot, shows summary measure and confidence interval # Can be useful for observing how values change across categorical values
import seaborn as sns sns.lvplot(my_df["col_0"])
# Create a boxplot sns.boxplot(data=df, x='Award_Amount', y='Model Selected') plt.show() plt.clf() # Create a violinplot with the husl palette sns.violinplot(data=df, x='Award_Amount', y='Model Selected', palette='husl') plt.show() plt.clf() # Create a lvplot with the Paired palette and the Region column as the hue sns.lvplot(data=df, x='Award_Amount', y='Model Selected', palette='Paired', hue='Region') plt.show() plt.clf() # Show a countplot with the number of models used with each region a different color sns.countplot(data=df, y="Model Selected", hue="Region") plt.show() plt.clf() # Create a pointplot and include the capsize in order to show bars on the confidence interval sns.pointplot(data=df, y='Award_Amount', x='Model Selected', capsize=.1)
StartSorted = SD_2016.groupby(by=['start_station_code'])['start_date'].agg( {'Count': np.size}) StartSorted['Count'] = StartSorted.Count.astype(int) StartSorted = StartSorted.sort_values(by='Count', ascending=False) StartSorted.head() ### 2.1.3. Let's plot useage duration distribution and Toal useage times f, axes = plt.subplots(2, 1, figsize=(20, 10)) plt.sca(axes[0]) TopStartUsedStation = np.array(StartUsedSorted.head(20).index) TopStartUsedStationData = SD_2016[SD_2016['start_station_code'].isin( TopStartUsedStation)] sns.lvplot(data=TopStartUsedStationData, x='start_station_code', y='duration_sec', order=TopStartUsedStation, palette="ocean") plt.title('The longest useage duration of start station in 2016', fontsize=18) plt.sca(axes[1]) TopStartUsed = np.array(StartSorted.head(20).index) TopStartUsedData = SD_2016[SD_2016['start_station_code'].isin(TopStartUsed)] sns.countplot(data=TopStartUsedData, x='start_station_code', order=TopStartUsed, palette="ocean") plt.title('The most useage times of start station in 2016', fontsize=18) plt.show() ### 2.1.4. Let's see what is the most useless station based on total used duration
import seaborn as sns import matplotlib.pyplot as plt # load data tips = sns.load_dataset("tips") # common style sns.set_style("whitegrid") ## ## Example 1 ## ========= ax = sns.lvplot(x=tips["total_bill"]) # save and show the picture plt.savefig('lvplot_1.png') plt.show() ## ## Example 2 ## ========= ax = sns.lvplot(x="day", y="total_bill", data=tips) # save and show the picture plt.savefig('lvplot_2.png') plt.show() ## ## Example 3
ax = sns.pointplot(x="Category", y="Clustering", hue="Category", data=df) plt.show(block=True) sns.pairplot(data=df, hue="Category") plt.show(block=True) sns.violinplot(x="Category", y="Clustering", hue="Category", data=df, inner="quart") plt.show() sns.lvplot(x="Category", y="Clustering", hue="Category", data=df, linewidth=2.5) plt.show(block=True) sns.swarmplot(x="Category", y="Clustering", hue="Category", data=df) plt.show(block=True) sns.regplot(x="Density", y="Clustering", data=df) plt.show(block=True) # sns.lmplot(x="Density", y="Clustering", hue="Category", truncate=True, size=5, data=df) # plt.show(block=True) # sns.lmplot(x="Density", y="Clustering", hue="Category", size=5, data=df) # plt.show(block=True)
fig, ax = plt.subplots(3, 1, figsize=(14, 10)) sns.boxplot(x="grade", y="loan_amnt", data=df_loan, palette="hls", ax=ax[0], hue="application_type", order=["A", 'B', 'C', 'D', 'E', 'F', 'G']) sns.violinplot(x='grade', y="int_rate", data=df_loan, hue="application_type", palette="hls", ax=ax[1], order=["A", 'B', 'C', 'D', 'E', 'F', 'G']) sns.lvplot(x="sub_grade", y="loan_amnt", data=df_loan, palette="hls", ax=ax[2]) plt.show() # Very very inteWe can clearly see difference patterns between Individual and Joint applications # <h1>Let's look the Employment title Distribuition </h1> # In[ ]: #First plot trace0 = go.Bar( x=df_loan.emp_title.value_counts()[:40].index.values, y=df_loan.emp_title.value_counts()[:40].values, marker=dict(color=df_loan.emp_title.value_counts()[:40].values), )
createProfilesBoxplot(profileData=x, plotId="8.11.32.top", namedProfiles=biasProfiles, highlightProfiles=tuple(top), title="Top 10 (100-300nt)") createProfilesBoxplot(profileData=x, plotId="8.11.32.bottom", namedProfiles=biasProfiles, highlightProfiles=tuple(bottom), title="Bottom 10 (100-300nt)") fig, ax1 = plt.subplots(figsize=(8, 2.2)) # Plot density using KDE #sns.kdeplot( plotdata[:,:,0].flatten(), plotdata[:,:,1].flatten(), n_levels=10, bw=0.65, cmap="Blues", shade=True, shade_lowest=True, legend=True, ax=ax1 ) sns.lvplot(data=x, ax=ax1, color="#50d080") #sns.boxplot(data=x, color="#50d080", ax=ax1, boxprops={'zorder': 10}) # Plot neutral line plt.plot([-1, v], [0, 0], '-k') # Plot window width plt.plot([0.5, 0.5 + 40 / profileStep], [-1.9, -1.9], '-r', linewidth=4) plt.annotate(s="window", xy=(1.0, -1.83)) # Annotate number of items plt.annotate(s='n = %d' % u, xy=(26.0, 1.8)) plt.xlim([-0.5, v - 0.5]) plt.ylim([-2, 2.5]) plt.title("lvplot")