def plot_multimodal_bars(PP): sns.catplot(data=PP,x='perception',y='logLikelihood', hue='pragmatics',kind='bar', order=['pool1','conv42','fc6'], palette='Paired', legend=False, ci=None) plt.ylabel('log likelihood') locs, labels = plt.xticks([0,1,2],['early','mid','high'],fontsize=14) plt.xlabel('visual features') # plt.ylim([-3000,0]) plt.tight_layout() plt.savefig('./plots/loglikelihood_models_multimodal.pdf')
def plot_human_bars(PP): sns.catplot(data=PP,x='pragmatics',y='logLikelihood', hue='production',kind='bar', order=['S0','combined'], hue_order=['nocost','cost'], palette='Paired', legend=False, ci=None) plt.ylabel('log likelihood') locs, labels = plt.xticks([0,1],['insensitive','sensitive'],fontsize=14) plt.xlabel('context') # plt.ylim([-3000,0]) plt.tight_layout() plt.savefig('./plots/loglikelihood_models_human.pdf')
## classical hypothesis test p = scipy.stats.binom_test(p1*n1, n=n1, p=1/32, alternative='two-sided') print 'Closer proportion diff from chance? p = {}'.format(p) p = scipy.stats.binom_test(p2*n2, n=n2, p=1/32, alternative='two-sided') print 'Further proportion diff from chance? p = {}'.format(p) ##### MAKE PLOTS AND SAVE OUT ## plot recognition accuracy by condition sns.set_context('poster') fig = plt.figure(figsize=(4,4)) redgld=[(0.8, 0.2, 0.2),(0.9, 0.7, 0.3)] sns.catplot(y='correct', x='target_category', hue='condition', hue_order=['closer','further'], order=['bird','car','chair','dog'], data=X,kind='bar',palette=redgld) plt.ylim([0,1]) plt.ylabel('proportion correct') plt.xlabel('category') h = plt.axhline(1/32,linestyle='dashed',color='black') plt.savefig('./plots/accuracy_by_category_and_condition.pdf') plt.close(fig) ## plot recognition accuracy by condition plt.figure(figsize=(2,4)) # sns.set_context('poster') redgld=[(0.8, 0.2, 0.2),(0.9, 0.7, 0.3)] sns.catplot(y='correct', x='condition',
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns df = pd.read_excel('./residue/无.xlsx') sns.catplot(x="class", y="Residue", kind="box", data=df) plt.show()
for metric in metrics: if 'mask' not in metric: df[metric + '_mean'] = df[metric].apply(np.mean) for metric in metrics: if 'mask' in metric: continue # plt.figure() # ax = sns.swarmplot(data=df, x='site', y=metric + '_mean', hue='fake', # linewidth=.9, edgecolor="black", size=6, dodge=True) g = sns.catplot(x='site', y=metric + '_mean', hue='fold', col='fake', data=df, kind='swarm', linewidth=.9, dodge=False, sharey=False) d = sns.catplot(x='fake', y=metric + '_mean', hue='real_name', data=df, kind='point') #plt.xticks([sites[x] for x in df['site'].values], rotation=90) plt.title(metric) # for metric in metrics: # if 'mask' in metric:
ax.set_ylabel("Number of Observaitons") ax.set_xticklabels(df.Year, rotation = 90 ) #Yazılar birbirine giriyorsa daha okunaklı olması için kullanılır plt.show() fig.savefig("Year.png") """**Seaborn** > AxesSubplot > FaceGrid """ sns.set_palette("RdBu") sns.countplot(x="Year", data = df) plt.show() sns.catplot(x="Year", aspect=3 , data = df , kind="count") plt.show() g = sns.catplot(x="Year", aspect=3 , data = df , kind="count") g.fig.suptitle("Year Counts", y=1) plt.xticks(rotation = 90) plt.show() sns.scatterplot(x="Year" , y="Selling_Price", data= df, hue="New") plt.show() sns.relplot(x="Year" , y="Selling_Price", data= df, hue="New", kind="scatter") plt.show() """**LINE PLOT**"""
top_dados_controle = dados_controle.query('composto in @cod_dados_controle') cod_dados_droga = dados_droga['composto'].value_counts().index[0:100] top_dados_droga = dados_droga.query('composto in @cod_dados_droga') cod_dados_controle top_dados_controle cod_dados_droga top_dados_droga dados_controle['composto'].unique() plot1 = sns.catplot(x='composto',data=top_dados_controle,col="dose",kind="count",col_wrap=2 ,height=4, aspect=.7) plot2 = sns.catplot(x='composto',data=top_dados_droga,col="dose",kind="count",height=7, aspect=.8) """#### - achar o composto em 'cacb2b860' dados droga""" findArray = dados_droga['composto'].unique() exist = 'cacb2b860' in findArray exist """#### - Posso concluir que o controle so foi realizado no composto 'cacb2b860' ##Separando compostos """
overwrite=False) pipeline = make_pipeline(CSP(n_components=8), LDA()) results = evaluation.process({"csp+lda": pipeline}) # To export the results in CSV within a directory: if not os.path.exists("./results"): os.mkdir("./results") results.to_csv("./results/results_part2-2.csv") # To load previously obtained results saved in CSV results = pd.read_csv("./results/results_part2-2.csv") ############################################################################## # Plotting Results # ---------------- # # We plot the results using the seaborn library. Note how easy it # is to plot the results from the three datasets with just one line. results["subj"] = [str(resi).zfill(2) for resi in results["subject"]] g = sns.catplot( kind="bar", x="score", y="subj", col="dataset", data=results, orient="h", palette="viridis", ) plt.show()
# plt.show() # Draw a boxplot with nested grouping by two categorical variables: ax = sns.boxplot(x="day", y="total_bill", hue="smoker", data=tips, palette="Set3") plt.savefig("boxplot with nested grouping by two categorical variables.jpg") # plt.show() # Draw a boxplot with nested grouping when some bins are empty: ax = sns.boxplot(x="day", y="total_bill", hue="time", data=tips, linewidth=2.5) plt.savefig("boxplot with nested grouping when some bins are empty.jpg") # plt.show() # Control box order by passing an explicit order: ax = sns.boxplot(x="time", y="tip", data=tips, order=["Dinner", "Lunch"]) plt.show() # Use catplot() to combine a pointplot() and a FacetGrid. This allows grouping within additional categorical variables. # Using catplot() is safer than using FacetGrid directly, as it ensures synchronization of variable order across facets: g = sns.catplot(x="sex", y="total_bill", hue="smoker", col="time", data=tips, kind="box", height=4, aspect=.7) plt.show()
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from scipy.stats import ranksums ''' Analise 5 - Hora do dia ''' gorjetas = pd.read_csv('data\\tips_tratados_4.csv',sep=',') print(gorjetas.head()) print(gorjetas.hora_do_dia.unique()) sns.catplot(x='hora_do_dia',y='valor_da_conta',data=gorjetas) plt.show() # Espalha alguns pontos proximos de forma distribuida para uma melhor visualização sns.catplot(x='hora_do_dia',y='valor_da_conta', kind='swarm',data=gorjetas) plt.show() # Demonstra a visualização em formato de violino, onde a maior concentração será apresentada na parte mais gorda sns.violinplot(x='hora_do_dia',y='valor_da_conta',data=gorjetas) plt.show() # Demonstra a estatisticas do metodo describe de uma forma visual sns.boxplot(x='hora_do_dia',y='valor_da_conta',data=gorjetas) plt.show() # Histograma = Um gráfico que tem, no eixo X, o valor da variável sendo exibida e no outro eixo, a frequência. # Histograma do almoço almoço = gorjetas.query("hora_do_dia == 'Almoço'").valor_da_conta sns.distplot(almoço) plt.show()
axis=1, keys=['Total', 'Percent']) print(missing_data_1) #Code ends here # -------------- #Code starts here #Setting the figure size plt.figure(figsize=(10, 10)) #Plotting boxplot between Rating and Category cat = sns.catplot(x="Category", y="Rating", data=data, kind="box", height=10) #Rotating the xlabel rotation cat.set_xticklabels(rotation=90) #Setting the title of the plot plt.title('Rating vs Category [BoxPlot]', size=20) #Code ends here # -------------- #Importing header files from sklearn.preprocessing import MinMaxScaler, LabelEncoder #Importing header files from sklearn.preprocessing import MinMaxScaler, LabelEncoder
ax = sns.kdeplot(data=train['Age'], shade=True, gridsize = 30) _ = ax.set(title='Age distribution', ylabel='Distribution', xlabel='Age - months') # %% [markdown] # ## We've got similar number of cats and dogs. Most of them are around 1 year old with slightly higher number of female pets. # %% [code] {"_kg_hide-input":true} num = 10 mixed_breed_class = 307 plt.figure(figsize=(20,20)) indexes, values = train['Breed1'][(train['Type'] == 'Dog')].value_counts().index[:num], train['Breed1'].value_counts()[:num] names = [id_to_breed(i) for i in indexes] s = pd.Series(data={'values': values.values, 'names': names}) ax = sns.catplot(x = 'values', y = 'names' , kind='bar', data = s) _ = ax.set(title=f'Dog breed classes top {num}', ylabel='Dog breed', xlabel='Count') indexes, values = train['Breed1'][(train['Type'] == 'Cat')].value_counts().index[:num], train['Breed1'].value_counts()[:num] names = [id_to_breed(i) for i in indexes] s = pd.Series(data={'values': values.values, 'names': names}) ax = sns.catplot(x = 'values', y = 'names' , kind='bar', data = s) _ = ax.set(title=f'Cat breed classes top {num}', ylabel='Cat breed', xlabel='Count') pure_breeded = train['Breed1'].apply(lambda x: 0 if id_to_breed(x) in ['Mixed Breed', 'Domestic Short Hard', 'Domestic Medium Hair', 'Domestic Long Hair'] else 1) print(f'Pure breeded pets: {sum(pure_breeded)}\nNot pure breeded pets: {len(pure_breeded)-sum(pure_breeded)}') # %% [markdown] # ### As we can see dogs as well as cats breeds are mostly dominated by ~3 classes. We've got 7512 purebreeded pets and 7481 pets that aren't purebreeded. # %% [code] {"_kg_hide-input":true}
#Histogram Plot of windspeed Column plt.figure(figsize=(7, 7)) plt.hist(train['windspeed'], bins=10) plt.xlabel('windspeed') plt.ylabel('Frequency') #Histogram Plot of count Column plt.figure(figsize=(7, 7)) plt.hist(train['count'], bins=10) plt.xlabel('count') plt.ylabel('Frequency') ################################################## Bivariate Plots ################################################################################################# for i in cat_cnames: sns.catplot(x=i, y="count", data=train) fname = str(i) + '.pdf' ################################################## Density Plots ################################################################################################## sns.kdeplot(train['season'], shade=True) sns.kdeplot(train['month'], shade=True) sns.kdeplot(train['holiday'], shade=True) sns.kdeplot(train['weekday'], shade=True) sns.kdeplot(train['workingday'], shade=True) sns.kdeplot(train['weather'], shade=True) sns.kdeplot(train['temperature'], shade=True) sns.kdeplot(train['atemp'], shade=True) sns.kdeplot(train['humidity'], shade=True) sns.kdeplot(train['windspeed'], shade=True) sns.kdeplot(train['count'], shade=True)
ax[i, -1].set_xlabel('RT (s)') ax[i, 0].set_ylabel('Reward manipulation = {:.2f}'.format(rewardfactor)) exp1 = pd.read_csv('../data/exp1.csv') exp2 = pd.read_csv('../data/exp2.csv') exp1 = exp1[(exp1['dyn'] == 'Dynamic') & (exp1['sub'] != 666) & (exp1['setsize'] == 12)] exp2 = exp2[(exp2['dyn'] == 'Dynamic') & (exp2['sub'] != 666) & (exp2['setsize'] == 12)] exp1['reward'] = 'None' newdf = exp1.append(exp1, sort=False) g = sns.catplot(x='reward', y='rt', hue='target', data=newdf, kind='point', order=['Absent', 'None', 'Present']) g.set_xticklabels(size=18) g.set_yticklabels(size=18) g.set_ylabels('RT (s)', size=20) g.set_xlabels('Reward Condition', size=20) ax = plt.gca() ax.plot([0, 1, 2], sim_mrts[::-1, 1, 1, 1], label='Sim Present', c='lightgreen', lw=2) ax.plot([0, 1, 2], sim_mrts[::-1, 1, 0, 0],
plt.style.use('ggplot') plt.xlabel("Victory Status") plt.ylabel("Winner") plt.title("Scatter Plot") plt.show() #relplot sns.set(style="darkgrid") sns.relplot(x="opening_ply", y="statusEnc", hue="winner", style="rated",ci=None, dashes=False, markers=True, kind="line", data=games); plt.ylabel("Victory Status") plt.show() black = games.query("winner == 'black'") white = games.query("winner == 'white'") draw = games.query("winner == 'draw'") #jointplot sns.set(style="white") sns.jointplot(games.white_rating, games.black_rating, kind="kde", height=7, space=0) plt.show() #cat plot pl = sns.catplot(x='victory_status',y='turns',hue='winner',data=games, height=6, kind="bar", palette="muted") pl.despine(left=True) plt.show() #Count plot sns.countplot(x='winner',data=games) plt.show()
hue='year', x='race', ) if SHOW: show() else: savefig('./race_year_count.png', ) del count_figure cat_figure = figure() cat = catplot( col='gender', data=input_df[[ 'gender', 'race', 'year', ]], hue='year', kind='count', x='race', ) if SHOW: show() else: savefig('./race_year_catplot.png', ) del cat_figure count_df = DataFrame([(key[0], key[1], key[2], value) for key, value in dict( Counter([ tuple(item) for item in input_df[[
#Convert csv file to dataframe df = pd.read_csv(filepath, encoding = 'ISO-8859-1') #Seperate only heart rate observations df_heartrate = df[df['type'].str.contains('HeartRate')] #Convert creation date column to datetime format df_heartrate['creation_date'] = pd.to_datetime(df_heartrate['creation_date'], format='%Y-%m-%d %H:%M:%S', utc=True) df_heartrate['creation_date'] = df_heartrate['creation_date'].dt.tz_convert(None) #Find integer value of weekday df_heartrate['weekday'] = df_heartrate['creation_date'].dt.dayofweek dayofweek = df_heartrate['weekday'].to_numpy() #Convert integer value of weekday to string represenation dayofweek_string = [] for i in range(0, len(dayofweek)): dayofweek_string.append(calendar.day_name[dayofweek[i]]) df_heartrate['weekday'] = dayofweek_string #Set index of dataframe as creation date df_heartrate.set_index('creation_date', inplace=True, drop=True) #Create and show violin plot sns.catplot(x='weekday', y='value', data=df_heartrate, kind='violin', order=['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], inner='quartile') plt.show()
estimator=sum, palette=sns.color_palette("muted", n_colors=len(despesas_sem_vencimentos) + 4), ) plot.set_xticklabels(plot.get_xticklabels(), rotation=75, horizontalalignment="right") plt.show() # In[48]: sns.catplot( x="value", y="Descricao", col="month", data=despesas_com_classificacao, kind="bar", height=4, aspect=0.7, ) # In[49]: despesas_com_classificacao[[ "legal_status", "subgroup", "Descricao", "summary", "Codigo" ]].head(100) # In[50]: # Auxílio-Alimentação # Vale refeição/vale alimentação
def main(args): df, info = basic_compare('./data/toysff.p', './data/amazon.p', 'toysff', 'amazon') output_as_csv(df, './output/price_differences.csv') # prepare data for bar plot df = df.rename(columns={'toysff_name': 'name'}) df_amazon = df.drop(['amazon_name', 'toysff_price'], 1).rename(columns={'amazon_price': 'price'}) df_toysff = df.drop(['amazon_name', 'amazon_price'], 1).rename(columns={'toysff_price': 'price'}) df_amazon['src'] = ['amazon'] * df_amazon['article_nr'].count() df_toysff['src'] = ['toys for fun'] * df_toysff['article_nr'].count() result = pd.concat([df_amazon, df_toysff]) print(result) pickle.dump(result, open('./data/comparison_amazon_toysff', 'wb')) if args.csv: output_as_csv(result, './output/comparison_amazon_toysff.csv') # data for subbrand plot subbrand_data = result[['subbrand', 'src', 'price']] subbrand_aggregated = subbrand_data.groupby( ['subbrand', 'src'])['price'].agg(['sum', 'count', 'mean']) subbrand_aggregated = subbrand_aggregated.reset_index() # data for price segment plot segment_data = result[['price_segment', 'src', 'price']] segment_aggregated = segment_data.groupby( ['price_segment', 'src'])['price'].agg(['sum', 'count', 'mean']) segment_aggregated = segment_aggregated.reset_index() # for the correct count- and sum-aggregation we need to drop duplicates # all values below are the same for 'src=amazon' and 'src=toysff' result_single = result[[ 'article_nr', 'subbrand', 'price_segment', 'diff_abs', 'diff_%' ]].drop_duplicates() # Output on console and to csv if set true output_diff_by_category(result_single, 'subbrand', 'diff_abs', args.csv) output_diff_by_category(result_single, 'subbrand', 'diff_%', args.csv) output_diff_by_category(result_single, 'price_segment', 'diff_abs', args.csv) output_diff_by_category(result_single, 'price_segment', 'diff_%', args.csv) if args.plot: # visualisation subbrands sns.set(style="whitegrid") sns.catplot(y='subbrand', x='mean', orient='h', height=8, hue='src', kind='bar', data=subbrand_aggregated, aspect=.8) plt.title('Subbrands: Average Prices in €') plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None) # visualation price_segments sns.set(style="whitegrid") sns.catplot(y='price_segment', x='mean', orient='h', height=8, hue='src', kind='bar', data=segment_aggregated, aspect=.8) plt.title('Price Segments: Average Prices €') plt.tight_layout(pad=1.08, h_pad=None, w_pad=None, rect=None) plt.show() print('Main Statistics: ') print(info)
#plt.leyend('asndlsad') plt.plot() plt.barh(resumen(cierre, '19').reset_index()['X ENS']) ############# sns.set(style="whitegrid") # Load the example Titanic dataset titanic = sns.load_dataset(resumen(cierre, '19')) # Draw a nested barplot to show survival for class and sex g = sns.catplot(x="PO", y=["X ENS", 'X PINTAR'], hue="CUSTOMER", data=resumen(cierre, '19').reset_index(), height=6, kind="bar", palette="muted") g.despine(left=True) g.set_ylabels("survival probability") titanic = sns.load_dataset("titanic") sns.catplot(y="CUSTOMER", hue="CUSTOMER", kind="count", palette="pastel", edgecolor=".6", data=carga)
def gender_bar_graph(): sns.catplot(x="sex", kind="count", palette="magma", data=data, height=6) plt.title("Gender of students : F - female,M - male") plt.show()
# stats pValsDF, bhDF = statsFeat(FeatMatGrouped, 'normal', 0.1, control) bhDF = bhDF.reset_index() bhDF['drug'] = [list(i) for i in bhDF.metadata.str if i.dtype == object][0] bhDF['date'] = bhDF.metadata.str[0] bhDF['worm_number'] = bhDF.metadata.str[1] bhDF['window'] = bhDF.metadata.str[-1] bhDF = bhDF.drop(columns='metadata') #plot the total number of sig feats by worm number and drug (with window as hue) for each date for date in metadata_dict['date']: sns.catplot(x = 'worm_number', \ y = 'sumSig',\ data = bhDF[bhDF['date']==date], \ hue = 'window', \ col = 'drug',\ kind ='bar', \ palette = 'colorblind' ) plt.savefig(os.path.join(save_dir, 'T_test_number_sig_feats5mins_{}.tif'.format(date)), bbox_inches='tight', \ pad_inches=0.03) #and for all data combined sns.catplot(x = 'worm_number', \ y = 'sumSig',\ data = bhDF, \ hue = 'window', \ col = 'drug',\ kind ='bar', \ palette = 'colorblind', ci= 'sd' )
df2.groupby(['biological_sample_group', 'sex'])[['data_point']].mean() #how to pass this along to the plot functions? #seems like a whisker plot can be generated directly from the group object #scatter plot of values in the 4 groups - put those into 4 variables???? # need color or shape to show M vs F and KO vs WT import matplotlib.pyplot as plt x = df2['date_of_experiment'] y = df2['data_point'] plt.scatter(x,y) plt.show() #how to do a scatterplot and color and shape the 4 groups ?? #here is column plot of KO vs WT grouped by sex import seaborn as sns x = df2['date_of_experiment'] y = df2['data_point'] sns.catplot(x = 'sex', y = 'data_point', col = 'biological_sample_group', kind = 'bar', data=df2) plt.show() # does not have mut vs wt # any reason to show weight? #sns.violinplot(x = 'sex', y = 'weight', col = 'biological_sample_group', kind = 'violin', data=df2) #plt.show()
df_sub['p_corrected'] = ps_corrected[1] temp.append(df_sub) anova_results = pd.concat(temp) anova_results['stars'] = anova_results['p_corrected'].apply(utils.stars) anova_results = anova_results.sort_values(['roi', 'condition', 'model']) anova_results.to_csv('../../../../results/{}/RP/{}/one way ANOVA.csv'.format( experiment, 'encoding 15 stats'), index=False) g = sns.catplot( x='roi_name', y='mean_variance', hue='model_name', hue_order=[ 'VGG19', 'DenseNet1211', 'MobileNetV2', 'Fast Text', 'GloVe', 'Word2Vec' ], row='condition', data=df, kind='bar', aspect=6, sharey=False, ) g._legend.set_title('Encoding Models') (g.set_axis_labels( "ROIs", "Mean Variance Explained").set_titles("{row_name}").set(ylim=(0, 0.06))) g.axes[0][0].set(title='Shallow Process') g.axes[1][0].set(title='Deep Process') k = {'I2V': -0.25, 'W2V': 0.175} j = 0.15 l = 0.0005
import seaborn as sns import matplotlib.pyplot as plt df = sns.load_dataset("tips") sns.set() sns.catplot(x="sex", y="total_bill", data=df, kind="bar", col="day", col_wrap=2) plt.show()
plt.figure(figsize=(5, 5)) plt.tight_layout() sns.boxplot(x='Type', y='Rating', data=df) # <b><i style="font-size:14pt;"><u>Installs</u> par <u>Price</u> :</i></b> # In[277]: # Paid Vs free et le nombre d'Installs installs_greater_1000 = df[df["Installs"] > 1000] installs_greater_1000 = installs_greater_1000.sort_values(['Price']) # In[278]: plt.figure(figsize=(20, 20)) sns.catplot(x="Installs", y="Price", data=installs_greater_1000) plt.xticks(rotation=90) plt.show() # <b><i style="font-size:14pt;"><u>Category</u> par <u>Size</u> :</i></b> # In[279]: plt.figure(figsize=(5, 15)) sns.barplot(x='Size', y='Category', data=df) # > <b><i style="font-size:14pt;">Analyses de la variable <u>Installs</u> : dans cette partie on va se consacrer de la variable Installs avec les autres variables</i></b> # # <b><i style="font-size:14pt;">Groupement des nombres d'Installs en 4 groupes : A, B, C, Highest.</i></b>
'awareness'] + ', ' + df_plot['confidence'] #temp = [] #for (target,subject),df_sub in df_plot.groupby(['success','sub']): # df_sub['prob'] = df_sub['count'] / df_sub['count'].sum() # temp.append(df_sub) #df_plot = pd.concat(temp) df_plot.to_csv(os.path.join(saving_dir, 'pos_for_plot.csv')) df_plot = df_plot.sort_values(['awareness']) g = sns.catplot( x='awareness', y='prob', hue='confidence', col='correctness', row='success', row_order=['high pos', 'low pos'], data=df_plot, kind='bar', aspect=2, ) (g.set_axis_labels('Awareness', 'Probability').set_titles("{row_name} | {col_name}").set( ylim=(0., 0.85)).despine(left=True)) for ii, (target, df_sub) in enumerate(df_plot.groupby('success')): # formula = 'prob ~ C(correctness)*C(awareness)*C(confidence)' # model = ols(formula, df_sub).fit() # aov_table = anova_lm(model, typ=2) # s = f"{target}, F({model.df_model: .0f},{model.df_resid: .0f}) = {model.fvalue: .3f}, p = {model.f_pvalue: .4f}" # print(s) # g.axes[ii][0].annotate(s,xy=(-0.45,.8))
ci=None, ) plt.xlabel("Season") plt.ylabel("Hourly number of bikes rented") plt.title("Number of bikes rented per hour by weather condition and season") plt.xticks(ticks=(0, 1, 2, 3, 3.5)) plt.grid(which="major", axis="y") # In[15]: sns.catplot( x="mnth", y="cnt", kind="point", hue="workingday", data=hour.compute(), ci=None, palette="Set1", aspect=2.3, legend=False, ) plt.legend(("Weekend", "Workday"), loc="upper right", bbox_to_anchor=(1.2, 0.5)) plt.xlabel("Month") plt.ylabel("Hourly number of bikes rented") plt.title("Number of bikes rented per hour by type of day") plt.axhline(hour.cnt.mean().compute(), ls="--", color="#a5a5a5") plt.text(0.5, hour.cnt.mean().compute() - 10, "Average", color="#a5a5a5") # In[16]:
train_data["Duration_hours"] = duration_hours train_data["Duration_mins"] = duration_mins train_data.drop(["Duration"], axis = 1, inplace = True) train_data.head() """---""" train_data["Airline"].value_counts() # From graph we can see that Jet Airways Business have the highest Price. # Apart from the first Airline almost all are having similar median # Airline vs Price sns.catplot(y = "Price", x = "Airline", data = train_data.sort_values("Price", ascending = False), kind="boxen", height = 6, aspect = 3) plt.show() # As Airline is Nominal Categorical data we will perform OneHotEncoding Airline = train_data[["Airline"]] Airline = pd.get_dummies(Airline, drop_first= True) Airline.head() train_data["Source"].value_counts() # Source vs Price sns.catplot(y = "Price", x = "Source", data = train_data.sort_values("Price", ascending = False), kind="boxen", height = 4, aspect = 3)
""" Plotting a three-way ANOVA ========================== _thumb: .42, .5 """ import seaborn as sns sns.set(style="whitegrid") # Load the example exercise dataset df = sns.load_dataset("exercise") # Draw a pointplot to show pulse as a function of three categorical factors g = sns.catplot(x="time", y="pulse", hue="kind", col="diet", capsize=.2, palette="YlGnBu_d", height=6, aspect=.75, kind="point", data=df) g.despine(left=True)
# 지역별로 subplot 그리기 # col = 어떤 기준으로 subplot 그릴건지? sns.relplot(data=df_last, x="연도", y="평당분양가격", hue="지역명", kind="line", col="지역명", col_wrap=4, ci=None) # 연도별 평당 가격을 지역별로 subplot bar chart로 표현 sns.catplot(data=df_last, x="연도", y="평당분양가격", kind="bar", col="지역명", col_wrap=4) # box plot sns.boxplot(data=df_last, x="연도", y="평당분양가격") # hue 사용해서 전용면적별로 plt.figure(figsize=(12, 3)) sns.boxplot(data=df_last, x="연도", y="평당분양가격", hue="전용면적") # violin plot (box plot에 밀도추정 값을 같이 볼수 있다) sns.violinplot(data=df_last, x="연도", y="평당분양가격") # ### lmplot & swarmplot # 연도별 평당분양가격을 lmplot으로
# One can find many ways to handle categorical data. Some of them categorical data are, # 1. <span style="color: blue;">**Nominal data**</span> --> data are not in any order --> <span style="color: green;">**OneHotEncoder**</span> is used in this case # 2. <span style="color: blue;">**Ordinal data**</span> --> data are in order --> <span style="color: green;">**LabelEncoder**</span> is used in this case # In[24]: df_train["Airline"].value_counts() # In[25]: # Airline vs Price sns.catplot(x = "Airline", y = "Price", data = df_train.sort_values("Price", ascending = False), kind = "boxen", height = 6, aspect = 2) plt.show() # In[26]: # From the above graph it is clear that jet airways has the maximun price # And also apart from the jet airways almost all the other airlines has the same median # In[27]: # As Airline is a Nominal Category we will perform one hot encoding
from sklearn.preprocessing import MinMaxScaler, LabelEncoder #Loading the data data = pd.read_csv(path) data['Rating'].plot(kind='hist') plt.show() #Code starts here data = data[data['Rating'] <= 5] data['Rating'].plot(kind='hist') plt.show() nulls = data.isnull().sum() data.dropna(inplace=True) plt.figure(figsize=(10, 10)) cat = sns.catplot(x='Category', y='Rating', kind='box', data=data, height=8) cat.set_xticklabels(rotation=90) plt.title("Rating vs Category boxplot", size=20) data['Genres'] = data['Genres'].str.split(";", expand=True)[0] mean_rating = data.groupby('Genres')['Rating'].mean() max_rating = data.groupby('Genres')['Rating'].max() min_rating = data.groupby('Genres')['Rating'].min() rating_data = { 'mean_rating': mean_rating, 'max_rating': max_rating, 'min_rating': min_rating } rating_data_df = pd.DataFrame( rating_data, columns=['mean_rating', 'max_rating', 'min_rating'])
# -*- coding: utf-8 -*- """ Created on Sun Jan 13 14:06:15 2019 @author: Eldrich """ import os import pandas as pd import seaborn as sns from matplotlib import pyplot as plt os.chdir('E:\\University\\data\\Class Work\\Thesis - KTH\\Extra Work\\Papers\\Journal Paper\\V2') df=pd.read_csv('Delay_Table.csv') sns.set(style='whitegrid') #sns.set(rc={'figure.figsize':(11.7,8.27)}) #g=sns.barplot(x=df['Run number'], y=df.Delay, hue=df.Pulse, data=df, color='gray') #g.despine(left=True) g=sns.catplot('Run number', 'Delay', 'Pulse', data=df, kind='bar', color='gray', legend=False, aspect=2, size=3) g.set_ylabels('Delay (s)') g.set_xlabels(label='') g.set(ylim=(0,0.5),xlim=g.ax.axes.get_xbound()) sns.despine() plt.plot([-0.54, 2.5400000000000005], [0.3435263157894737,0.3435263157894737], linewidth=1, linestyle='--', color='k') #sns.set_context("paper") plt.annotate('Average',xy=(1,0), xytext=(2,0.35)) plt.tight_layout()
# Graphing the differences of infrastructure reports: # merge the two datasets and assign column to distinguish them infra_vergleich = pd.concat([ vor_profilsumme_infra.assign(Monat='Vormonat'), neu_profilsumme_infra.assign(Monat='aktueller Monat') ]) infra_vergleich.reset_index(inplace=True) fig, ax = plt.subplots(figsize=(15, 15)) ax = sns.catplot(x="Profil", y="Preis 1 (€)", hue="Monat", data=infra_vergleich, height=8, kind="bar", palette="muted", legend=False) ax.set_title = 'Infrastructure Vergleich zu Vormonat' ax.set_ylabels("Umsatz in EURO") ax.set_xlabels("Profile") plt.legend(loc="upper left") fig.tight_layout() plt.savefig("diff_infra.png") plt.clf() # Graphing the differences of extern reports: