def plot(df: pd.DataFrame, id_vars: list, value_vars: list): """ Plots a data frame. :param df: dataframe to plot :type df: pd.DataFrame :param id_vars: list of column names for the x-axis :type id_vars: list or str :param value_vars: list of column names for the y-axis :type value_vars: list :return: melted data frame :rtype: pd.DataFrame """ if not isinstance(id_vars, list): id_vars = [id_vars] data = pd.melt(df, id_vars=id_vars, value_vars=value_vars) sns.lineplot(x=id_vars[0], y="value", hue="variable", data=data) return data
def generate_plot(data: pd.DataFrame, output_path: str) -> None: """ Generates am image plot for the given dataframe :param data: The dataframe to generate the plot for :param output_path: THe file output path """ palette = {'L': '#875438', 'C': '#0E0C0C', 'U': '#8A8D91', 'R': '#C1A15B', 'M': '#EC7802'} plt = sns.lineplot(data=data, palette=palette, linewidth=1.5, hue='A') plt.set(ylabel='Average Proportion of Deck') fig = plt.figure fig.savefig(output_path)
df['counts'] = df.groupby('datetime', as_index=False)['datetime'].transform(lambda s: s.count()) x = []; y=[] for point in df['sentiment']: x.append(point[0]) y.append(point[1]) plt.figure(figsize=(10, 10)) plt.scatter(x, y, alpha=0.5) plt.title("Sentiment:Polarity vs. Subjectivity") plt.xlabel("Polarity") plt.ylabel("Subjectivity") plt.show() plt.figure(figsize=(10, 10)) sns.lineplot(x="datetime", y="counts", data=df) plt.xticks(rotation=15) plt.title('Count of Tweet Times By Minute') plt.xlabel('Time(Minutes)') plt.ylabel('Count') plt.show() df['datetime'] = pd.to_datetime(df['datetime'], format='%m/%d/%y %H:%M').dt.hour plt.figure(figsize=(10, 10)) sns.lineplot(x="datetime", y="counts", data=df) plt.xticks(rotation=15) plt.title('Count of Tweet Times By Hour') plt.xlabel('Time(Hours)') plt.ylabel('Count') plt.show()
def nucleotide_difference_imbalance_plot_stylized_like_Figure_8_of_Morrill_et_al_2016( sequence_file, chr_, position_corresponding_to_first_nt, chunk_size = chunk_size, overlap_specified = overlap_specified, save_vg=False, return_fig=False): ''' Main function of script. Make a plot figure like Figure 8, panel B of Morrill et al 2016 (PMID: 27026700), see https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4882425/figure/F8/ . Inputs: - sequence file of spanning desired coordinates in FASTA format with single sequence in it. (Or at least first sequence be the one to use.) - text string of designation to use for chromosome/contig/scaffold/region in plot labels - position corresponding to first bases in provided sequence as an integer. To be used to label positions along coordinates in x-axis of plot. Optionally you can provide integer settings for the `chunk_size` and `overlap_specified` for the analyses windows. Without specifying them, by default they are set to mirror Figure 8, panel B of Morrill et al 2016 (PMID: 27026700). You can set `return_fig` to return a plot figure object and would be useful if calling from a Jupyter notebook or IPython. If you assign it to `ax` in your notebook, you can redisplay it in another cell via `ax.figure`. Saving as vector graphics is also an option. It is not the default because file PNG-style image files are more familiar to most folks and more convenient in Jupyter notebooks. Saves an image file of the plot, or optionally, returns a plot object. ''' # Retrieve sequence from sequence file and break it up into the set chunks #--------------------------------------------------------------------------- seq_entries = Fasta(sequence_file) seq = seq_entries[0] # assume first one is the one to be used chunks = ( list(gen_chunk_string_with_different_step(seq,chunk_size,step_size))) #discard any chunks at end less than the size of the set window chunks = [x for x in chunks if len(x)== chunk_size] # Assign midpoint postions to each chunk based on chunk_size, start position # input, and provided sequence length. # ALSO, while going through chunks, might as well do calculation too: #Calculate G vs. C and A vs. T that I think might give results like Figure 8 # of Morrill et al 2016 (PMID: 27026700) #--------------------------------------------------------------------------- #determining midpoints for each chunk (relative length of provided sequence) # and assiging to a list. (used a dictionary in development but a list # should be better for python 2.7 compatibility) # Had calculation as separate step in development but why interate again. # (Doing similar thing where now storing `chunks_diffs` as list and # not diction for python 2.7 compatibility.) import sys #chunks_midpoints = {} #key will be index of chunk in chunks chunks_midpoints = [] #chunks_diffs = {} #key will be index of chunk in chunks. value will be tuple # with each diff as an item chunks_diffs = [] for indx,chunk in enumerate(chunks): # handle first chunk without much fanfare because easy calculation and # doesn't depend on a previous one if indx == 0 and len(chunk)== chunk_size: #chunks_midpoints[indx] = chunk_size/2 chunks_midpoints.append(chunk_size/2) elif len(chunk)== chunk_size: start_curr_chunk = step_size * indx end_curr_chunk = step_size * indx + chunk_size #chunks_midpoints[indx]= midpoint((start_curr_chunk,end_curr_chunk)) chunks_midpoints.append(midpoint((start_curr_chunk,end_curr_chunk))) else: sys.stderr.write("\n\nError? Issue with size of sequence chunks " "not matching expected/\n") #shouldn't happen sys.exit(1) GC_diff = calc_nt_diff("GC",str(chunk)) # casting 'Sequence' object to # string with `str(chunk)` AT_diff = calc_nt_diff("AT",str(chunk)) # casting 'Sequence' object to # string with `str(chunk)` #chunks_diffs[indx] = (GC_diff,AT_diff) chunks_diffs.append((GC_diff,AT_diff)) # Adjust potions of midpoints to account for first base in sequence file # being not first base of chromosome #--------------------------------------------------------------------------- # correct chunks_midpoints to take into account that first postion in # provided sequence might not be first position of that sequence along # chromosome so that labels for x-axis will match situation # (This was done with a dictionary comprehension in development where used # dictionary) start_pos = position_corresponding_to_first_nt #chunks_midpoints = {k:v+start_pos for k,v in chunks_midpoints .items()} chunks_midpoints = [x+start_pos for x in chunks_midpoints] '''Combined into first interation through list of chunks above #Calculate G vs. C and A vs. T that I think might give results like Figure 8 # of Morrill et al 2016 (PMID: 27026700) #--------------------------------------------------------------------------- chunks_diffs = {} #key will be index of chunk in chunks. value will be tuple # with each diff as an item for indx,seq in enumerate(chunks): GC_diff = calc_nt_diff("GC",str(seq)) # casting 'Sequence' object to # string with `str(seq)` AT_diff = calc_nt_diff("AT",str(seq)) # casting 'Sequence' object to # string with `str(seq)` chunks_diffs[indx] = (GC_diff,AT_diff) ''' #Make the plot #--------------------------------------------------------------------------- sns.set() plt.figure(figsize=default_plt_image_size) #indx = list(chunks_midpoints.values()) #data = list(chunks_diffs.values()) #indx and data were originally organized in dictionaries during development # but for better compatibility with Python 2.7 lists were used so order # maintained #df = pd.DataFrame(data, indx, ["CvsG", "TvsA"]) df = pd.DataFrame(chunks_diffs, chunks_midpoints, ["G-C", "A-T"]) # went # with what is used Morrill et al. figure for line labels even though # `["CvsG", "TvsA"])` is more descriptive to what is being plotted. ax = sns.lineplot(data=df) ax.set_ylabel(yaxis_label, fontsize = 16); ax.set_xlabel(xaxis_label_prefix+chr_, fontsize = 16); ax.legend(fontsize= 12); # Return plot figure object (meant for calling from Jupyter cell or IPython) # or save to file. #--------------------------------------------------------------------------- if return_fig: sys.stderr.write("Plot figure object returned.") return ax else: #save image; standard for when called from command line; however, will # also be default for when called from Jupyter notebook / IPython unless # called with `return_fig=True` output_file_name = generate_output_file_name(sequence_file, suffix_for_saving_plot) if save_vg: plt.savefig(output_file_name[:-4]+".svg", orientation='landscape') # FOR VECTOR GRAPHICS; useful if merging # into Adobe Illustrator. Based on # https://neuroscience.telenczuk.pl/?p=331 ; I think ReportLab also # outputs SVG? sys.stderr.write("\nPlot image saved to: {}\n".format( output_file_name[:-4]+".svg")) else: # save png plt.savefig(output_file_name) sys.stderr.write("\nPlot image saved to: {}\n".format( output_file_name))
import seaborn as sns import pandas as pd import matplotlib.pyplot as plt if __name__ == '__main__': fmri = sns.load_dataset("fmri") ax = sns.lineplot(x="timepoint", y="signal", data=fmri) plt.show()
# print(df_orig['del_rate']) print( "median error rate/subs/ins/del original:", median_error ) #, 100*df_orig['subs_rate'].median(), 100*df_orig['ins_rate'].median(), 100*df_orig['del_rate'].median()) # g = sns.catplot(x="abundance_original", y="abundance_corrected", col="Depth", col_wrap=3, # data=indata, kind="point", aspect=1) # ax = sns.scatterplot(x="abundance_original", y="abundance_corrected", #col="Depth", # col_wrap=3, #hue="time", # data=indata) # g = sns.lmplot(x="transcript_cov", y="err_rate", hue="type", scatter= False, x_estimator = np.mean, #x_jitter = 0.1, #col="Depth", # x_ci= 'sd', data=indata) #, col_wrap=2, height=3) ax = sns.lineplot(x="transcript_cov", y="err_rate", hue="type", ci='sd', estimator='median', data=indata) ax.set_ylim(0, 12) ax.set_xscale('log') ax.set_xticks( [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) ax.set_ylabel("Error rate (%)") ax.set_xlabel("Reads per transcript") # g.set(ylim=(0,100)) # ax.set_ylabel("Abundance after correction") # ax.set_xlabel("Abundance before correction") # g.set_ylabels("Error rate (%)") # g.set_xlabels("Reads per transcript")
def lineplot_per_interval(df, y, hue, y_title="", start_year=DEFAULT_MIN_YEAR, save_to_file=True, filetype="pdf", legend_loc="upper right", hue_order=None, legend_title="Snapshot rank", estimator=np.median): fig, ax = plt.subplots() if not y_title: if hue: y_title = hue.title() else: y_title = "" # filter by start year df = df[df.year_season >= str(start_year)] set_plot_params() if hue: try: fig = sns.lineplot(data=df, y=y, x=INTERVAL_COL_NAME, hue=hue, style=hue, hue_order=hue_order, estimator=estimator, ci=95) except: markers = list(range(1, len(hue_order) + 1)) print(markers) fig = sns.lineplot(data=df, y=y, x=INTERVAL_COL_NAME, style=hue, hue=hue, dashes=False, markers=markers, hue_order=hue_order) set_legend(ax, legend_title, legend_loc) plt.setp(fig.get_legend().get_texts(), fontsize='10') # for legend text plt.setp(fig.get_legend().get_title(), fontsize='10') # for legend text else: fig = sns.lineplot(data=df, y=y, x=INTERVAL_COL_NAME, estimator=estimator) set_x_ticks(df) #fig.set(xlabel='Interval', ylabel=y_title) ax.set_xlabel('Interval') ax.set_ylabel(y_title, fontsize=10) # fig.set_title(y_title + " per interval") fig_filename = "lineplot_%s.%s" % ("_".join( y_title.replace("%", "pct").replace("(", "").replace( ")", "").lower().split()), filetype) if save_to_file: save_figure(fig, fig_filename) return fig
has_latest_model = True predictions_latest, _ = dataset.predict(df, './models/latest', True) print('Mean error latest : ' + str(dataset.mean_error(np.array(predictions_latest), y))) success_rate_latest = dataset.get_trend_success_rate(predictions_latest, y, df) print('Trend prediction success for latest : ' + str(success_rate_latest) + '%') except: has_latest_model = False predictions_latest = None pass print('Mean error reference : ' + str(dataset.mean_error(np.array(predictions_reference), y))) success_rate_reference = dataset.get_trend_success_rate(predictions_reference, y, df) print ('Trend prediction success for reference : ' + str(success_rate_reference) + '%') figure(num=None, figsize=(24, 10), dpi=80, facecolor='w', edgecolor='k') ax = sns.lineplot(x=predictions_reference.index, y=y, label="Test Data", color='blue') ax = sns.lineplot(x=predictions_reference.index, y=predictions_reference[0], label="Prediction reference", color='gray') if has_latest_model: ax = sns.lineplot(x=predictions_reference.index, y=predictions_latest[0], label="Prediction latest", color='red') ax.set_title('Price', size = 14, fontweight='bold') ax.set_xlabel("Hours", size = 14) ax.set_ylabel("Cost", size = 14) ax.set_xticklabels('', size=10) plt.show()
homogeneity = {} # use kmeans to loop over candidate number of clusters # store inertia and homogeneity score in each iteration for k in range(1, 5): km = KMeans(n_clusters=k) pred = km.fit_predict(x) inertia[k] = km.inertia_ homogeneity[k] = homogeneity_score(y, pred) # %% ax = sns.lineplot( x=list(inertia.keys()), y=list(inertia.values()), color="blue", label="inertia", legend=None, ) ax.set_ylabel("inertia") ax.twinx() ax = sns.lineplot( x=list(homogeneity.keys()), y=list(homogeneity.values()), color="red", label="homogeneity", legend=None, ) ax.set_ylabel("homogeneity") ax.figure.legend()
daily_data.dtypes dfNW56Articles.reset_index(inplace=True, drop=True) #reset index #merge myArticles = pd.merge(dfNW56Articles, daily_data, on='date', sort=True) myArticles.describe() #95 articles au final. # Sauvegarde de myArticles en csv pourrait servir dans d'autres articles. myArticles.to_csv("myArticles.csv", sep=";", index=False) #séparateur ; ########################################################################## # Graphiques des événements et des publications des articles # sur toute la période ########################################################################## sns.set() #paramètres esthétiques ressemble à ggplot par défaut. fig, ax = plt.subplots() #un seul plot sns.lineplot(x='date', y='pageviews', data=daily_data, color='grey', alpha=0.2) sns.scatterplot(x='date', y='pageviews', data=myOutliers, color='red', alpha=0.5) sns.scatterplot(x='date', y='pageviews', data=myArticles, color='blue', marker="+") fig.suptitle(str(len(myOutliers)) + " événements (ronds rouges) pour " + str(len(myArticles)) + " articles (croix bleues) : ", fontsize=14, fontweight='bold') ax.set(
train_df['year'] = train['date'].dt.year train_df['month'] = train['date'].dt.month train_df['day'] = train['date'].dt.dayofyear train_df['weekday'] = train['date'].dt.weekday train_df.head() # -------------------------------------------------------------------------------------------------- # ---- Scomposizione Time Series # per cominciare bisogna scomporre la serie per studiare: # -- la stagionalità # -- il trend # -- i residui # dato che abbiamo 5 anni di dati ci aspettiamo una stagionalità annuale o settimanale sns.lineplot(x="date", y="sales",legend = 'full' , data=train_df) # dal grafico sopra possiamo vedere un trend crescente ed una stagionalità annuale # sembra che nei mesi centrali ci sia un picco nelle vendite sns.lineplot(x="date", y="sales",legend = 'full' , data=train_df[:28]) # dal grafico sopra non vediamo chiaramente la presenza di una stagionalità settimanale sns.boxplot(x="weekday", y="sales", data=train_df) # da questo grafico (sopra) possiamo vedere come in media ci sia un lieve trend crescente # nelle vendite all'interno della settimana # Lunedì = 0 ---- Domenica = 6 # inoltre possiamo vedere che nei giorni infra-settimanali le vendite siano minori che nel weekend # creo train_df train_df = train_df.set_index('date') train_df['sales'] = train_df['sales'].astype(float)
temp[i, j] ]) hv_df = pd.DataFrame(hv_progress_df, columns=[ 'Evaluations', 'Runs', 'Approaches', 'HV (Underlying)' ]) print(hv_df) color_map = plt.cm.get_cmap('viridis') color_map = color_map(np.linspace(0, 1, 5)) ax = sns.lineplot(x="Evaluations", y="HV (Underlying)", hue="Approaches", style="Approaches", markers=True, dashes=False, data=hv_df, palette=color_map) #box = ax.get_position() handles, labels = ax.get_legend_handles_labels() #ax.legend(handles=handles, labels=labels) ax.legend(handles=handles[1:], labels=labels[1:], frameon=False, loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=5) #plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.) #ax.set_title(prob + ', ' + str(obj) + ' objs, ' + sx)
# finding outliers of purchases sns.boxplot(cc_clean['purchases'], color='#fdc029') plt.show() # finding outliers of cash_advance sns.boxplot(cc_clean['cash_advance'], color='#fdc029') plt.show() # finding outliers of payments sns.boxplot(cc_clean['payments'], color='#fdc029') plt.show() # plotting balance and different purchase options plt.figure(figsize=(10,4)) sns.lineplot(cc_clean['balance'],cc_clean['purchases'],label="Purchases") sns.lineplot(cc_clean['balance'],cc_clean['oneoff_purchases'],label='Oneoff') sns.lineplot(cc_clean['balance'],cc_clean['installments_purchases'],label='Install') plt.show() sns.pairplot(cc_clean) """# Hierarchical clustering""" # using standardscaler to reduce the distance of each variable sc = StandardScaler() xs = sc.fit_transform(cc_clean) X = pd.DataFrame(xs, index=cc_clean.index, columns=cc_clean.columns) # hclust # going to do euclidean, cosine and cityblock distance
def do_plot(df, thr, min_epc, hue='dset', title='', legend_hue=False): sns.set_theme() scrtmarkers = {'optima': 'o', 'stop': 'X', 'optima/stop': 'D'} plt.subplot(211) if title: plt.title(title) gsc = sns.scatterplot(data=df[df['convergence'] != ''], x='epoch', y='err_prior', hue=hue, markers=scrtmarkers, s=100, style='convergence', legend=True) # get current handles and labels current_handles, current_labels = plt.gca().get_legend_handles_labels() # remove title t_ = [(h, l) for h, l in zip(current_handles, current_labels) if (('optima' in l) or ('stop' in l))] current_handles = [t[0] for t in t_] current_labels = [t[1] for t in t_] conv_leg = plt.legend(current_handles, current_labels, bbox_to_anchor=(1.0, 1.0), loc='upper right') g = sns.lineplot(data=df, x='epoch', y='err_prior', hue=hue, legend=legend_hue) if legend_hue: # get handles of "hue" n_hue = df[hue].unique().size current_handles, current_labels = plt.gca().get_legend_handles_labels() current_handles = current_handles[:n_hue] current_labels = current_labels[:n_hue] plt.legend(current_handles, current_labels, bbox_to_anchor=(0.75, 1.), loc='upper right') # We need this because the 2nd call to legend() erases the first # g.add_artist(conv_leg) g.set(xlabel=None) plt.ylabel("Mean absolute error \n of priors") plt.subplot(212) g = sns.lineplot(data=df, x='epoch', y='var_pseudo_neg', hue=hue, legend=False) plt.axhline(y=thr, color='r', linestyle='--') plt.axvline(x=min_epc, color='g', linestyle='--') g = sns.scatterplot(data=df[df['convergence'] != ''], x='epoch', y='var_pseudo_neg', hue=hue, legend=False, markers=scrtmarkers, s=100, style='convergence') plt.ylabel("variance of\n pseudo negatives") return plt.gcf()
sns.barplot(data=four_most_popular, y='item_name', x='revenue') plt.ylabel('') plt.xticks() # 5.) Load the sleepstudy data and read it's documentation. # Use seaborn to create a line chart of all the individual subject's reaction times and a more prominant line showing the average change in reaction time. sleep = data('sleepstudy') data('sleep', show_doc=True) sleep.info() sleep.tail(20) sleep.describe() sns.set_style('darkgrid') sns.set_context(font_scale=1, rc={ "grid.linewidth": 1, "axes.linewidth": 1, "ytick.major.width": 1, "xtick.major.width": 1, "lines.linewidth": 1 }) palette = sns.color_palette("deep", 18) sns.lineplot(x='Days', y='Reaction', hue='Subject', data=sleep, palette=palette) sns.set_context(rc={"lines.linewidth": 4}) sns.lineplot(x='Days', y='Reaction', data=sleep, ci=None)
A = 1/(12 * K * (K * N)) B = (k + N - 1/2)/(K*(K+N)) func = lambda tau : -1/tau**2 + 2*A*tau + B taus = [] for j in [10,500,100]: tau_initial_guess = 1 tau_solution = fsolve(func, tau_initial_guess) taus.append(tau_solution) tau_solution = np.mean(taus) points.append({"k":k,"alpha":tau_solution,"N":N,"K":K}) pfx = pd.DataFrame(points) pfx['N'] = pfx.N.astype(str) pfx.to_csv("simulation_data.tsv",sep="\t") plt.savefig("simulation.pdf",dpi=300) pfx = pd.read_csv("simulation_data.tsv",sep="\t") sns.lineplot(pfx.K,pfx.alpha, label = "Simulated data") def fit_func(x, a, b): return a*np.power(x,2/3) + b params = curve_fit(fit_func, pfx.K, pfx.alpha) a, b = params[0] X_space = pfx.K Y_space = a * np.power(X_space,2/3) + b plt.plot(X_space,Y_space, label = r"$"+str(np.round(a,2))+r" \cdot K^{\frac{2}{3}} "+str(np.round(b,2))+r"$") plt.legend() plt.ylabel(r"$\gamma$") #plt.title(r"Fitting $\gamma$ estimate to the space of 250,000 networks") plt.savefig("simulation_final.pdf",dpi=300)
def plot_linear(k,X,y,I_pca,I_spearman,I_mRMR,I_hierarchy,I_sklearn,I_permutation_oob,I_dropcol_oob): df_linear = pd.DataFrame({'k':np.arange(1,k+1) , 'mae_pca':mae_I_linear(X,y,sort_I(I_pca),k) ,'mae_spearman':mae_I_linear(X,y,sort_I(I_spearman),k) ,'mae_mRMR':mae_I(X,y,sort_I(I_mRMR),k) ,'mae_hierarchy':mae_I_linear(X,y,sort_I(I_hierarchy),k) ,'mae_sklearn':mae_I_linear(X,y,sort_I(I_sklearn),k) ,'mae_permutation_oob':mae_I_linear(X,y,sort_I(I_permutation_oob),k) ,'mae_dropcol_oob':mae_I_linear(X,y,sort_I(I_dropcol_oob),k) }) sns.lineplot(df_linear['k'],df_linear['mae_pca'],marker='o', label='pca') sns.lineplot(df_linear['k'],df_linear['mae_spearman'],marker='p', label='spearman') sns.lineplot(df_linear['k'],df_linear['mae_mRMR'],marker='1', label='mRMR') sns.lineplot(df_linear['k'],df_linear['mae_hierarchy'],marker='s', label='hierarchy') sns.lineplot(df_linear['k'],df_linear['mae_sklearn'],marker='d', label='sklearn') sns.lineplot(df_linear['k'],df_linear['mae_permutation_oob'],marker='*', label='permutation') sns.lineplot(df_linear['k'],df_linear['mae_dropcol_oob'],marker=11, label='dropcol') plt.ylabel('20% 5fold CV MAE ($)') plt.title('linear model')
sep="\t", names=["Ref", "Pos", "depth"]) sns.lmplot(x="POS", y="ALT_FREQ", data=rep_b, fit_reg=False) fig, ax1 = plt.subplots(figsize=(15, 5)) color = "tab:green" #isnv plot creation ax1.set_ylabel('iSNVs', color="blue", fontsize=20) ax1.tick_params(axis='y', labelcolor="blue") ax1 = sns.regplot(x="POS", y="ALT_FREQ", data=rep_b, ax=ax1, palette='summer') #specify we want to share same x axis ax2 = ax1.twinx() color = 'tab:red' # lineplot creation for the depth sns.lineplot(x='Pos', y='depth', data=japan1_depth, ax=ax2) ax2.set_ylabel('Depth', color="red") ax2.tick_params(axis='y', labelcolor="red") plt.show() fig, ax1 = plt.subplots() ax1.set_xlabel('Position') ax1.set_ylabel('iSNVs', color="blue") ax1.tick_params(axis='y', labelcolor="blue") rep_a.plot(x="POS", y="ALT_FREQ", label="1_peru", ax=ax1, kind="scatter", color="blue",
# Sets figure parameters plt.figure(figsize=(6,5)) plt.rcParams['axes.titlesize'] = 13 plt.rcParams['axes.labelsize'] = 13 plt.rcParams['xtick.labelsize'] = 13 plt.rcParams['ytick.labelsize'] = 13 plt.rcParams['legend.fontsize'] = 12 plt.xlabel('Number of tasks (T)', fontsize=13) plt.ylabel('Makespan (a.u.)', fontsize=13) plt.xticks(range(1000,10001,1000)) plt.xticks(rotation=15) plt.ylim(0, 12000) sns.lineplot(data=recursive_results[recursive_results.Resources == 10], x='Tasks', y='Makespan', hue='Scheduler', linewidth=2, hue_order=order) plt.savefig("s1-recursive-10.pdf", bbox_inches='tight') # In[ ]: # Sets figure parameters plt.figure(figsize=(6,5)) plt.rcParams['axes.titlesize'] = 13 plt.rcParams['axes.labelsize'] = 13 plt.rcParams['xtick.labelsize'] = 13 plt.rcParams['ytick.labelsize'] = 13
fn, columns=["year_season", "policy_snapshot_url", "match_str"]) cache[identifier] = results_df return results_df def plot_term(df, term, regex=False, case=False, min_year=DEFAULT_MIN_YEAR, save_figure=True): plt.figure(figsize=(10, 5)) percentages = search_term(df, term, regex, case, min_year) fig = sns.lineplot(x="interval", y="percentage", data=percentages) fig.set_xticklabels(fig.get_xticklabels(), rotation=45, fontsize='small') if not final: title = "Query: %s\n(Min-year: %s, Regex: %s, Case sensitive: %s)" % ( term, min_year, regex, case) fig.set_title(title) plt.ylim(ymin=0) if save_figure: s_fig = fig.get_figure() s_fig.savefig("figures/%s_%s_%s_%s.png" % (term, min_year, regex, case), bbox_inches='tight') return fig
all_recalls[model] = mean_recall * 100 all_maps[model] = np.mean(maps) * 100 if APPROXIMATE: print(all_recalls[model]) print(len(all_recalls[model])) if len(all_precisions[model]) > 1000: last_precision = all_precisions[model][-1] last_recall = all_recalls[model][-1] all_precisions[model] = all_precisions[model][0::50] all_recalls[model] = all_recalls[model][0::50] all_precisions[model] = np.append(all_precisions[model], last_precision) all_recalls[model] = np.append(all_recalls[model], last_recall) print(all_recalls[model]) print(len(all_recalls[model])) sns.lineplot(all_precisions[model], all_recalls[model], label=str(model_print_name), linewidth=3) plt.xlim([0.0, 100]) plt.ylim([0.0, 105]) plt.title("WHEN: Precision-recall plot") plt.legend(loc="lower left") sns.despine() plt.savefig("./figures/roc_legend.png") plt.xlim([0.0, 100]) plt.ylim([0.0, 105]) plt.title("WHEN: Precision-recall plot") plt.legend(loc="lower left") ax.get_legend().remove() sns.despine() plt.savefig("./figures/roc_when.png")
def plot(self, df_in, output_prefix, output_dir, dpi, force, show, verbose): df = df_in # Set a filename if needed. if self.filename is None: self.filename = f"{self.plot_type}--{self.xkey}--{self.ykey}--{self.huekey}--{self.stylekey}" # Use some seaborn deafault for fontsize etc. sns.set_context(self.sns_context, rc={"lines.linewidth": 2.5}) # Set the general style. sns.set_style(self.sns_style) # Filter the data using pandas queries if required. if self.df_query is not None and len(self.df_query): df = df.query(self.df_query) # If the df is empty, skip. if df.shape[0] == 0: print(f"Skipping plot {self.filename} - 0 rows of data") return False # Get the number of palette values required. huecount = len( df[self.huekey].unique()) if self.huekey is not None else 1 # Set palette. palette = sns.color_palette(self.sns_palette, huecount) sns.set_palette(palette) # create a matplotlib figure and axis, for a single plot. # Use constrained layout for better legend placement. fig, ax = plt.subplots(constrained_layout=True) # Set the size of the figure in inches fig.set_size_inches(FIGSIZE_INCHES[0], FIGSIZE_INCHES[1]) # Generate labels / titles etc. xlabel = f"{pretty_csv_key(self.xkey)}" ylabel = f"{pretty_csv_key(self.ykey)}" huelabel = f"{pretty_csv_key(self.huekey)}" stylelabel = f"{pretty_csv_key(self.stylekey)}" hs_label = f"{huelabel} x {stylelabel}" if huelabel != stylelabel else f"{huelabel}" figtitle = f"{ylabel} vs {xlabel} (hs_label)" # @todo - validate keys. # Decide if using internal legend. external_legend = self.legend_outside filled_markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h', 'H', 'D', 'd', 'P', 'X') g = None if self.plot_type == "lineplot": # plot the data @todo - lineplot vs scatter? g = sns.lineplot( data=df, x=self.xkey, y=self.ykey, hue=self.huekey, style=self.stylekey, markers=True, dashes=True, ax=ax, # size=6, legend=self.sns_legend, palette=palette, ) elif self.plot_type == "scatterplot": g = sns.scatterplot( data=df, x=self.xkey, y=self.ykey, hue=self.huekey, style=self.stylekey, markers=True, ax=ax, # size=6, legend=self.sns_legend, palette=palette, ) else: raise Exception(f"Bad plot_type {self.plot_type}") # Set a title # @disabled for now. # if len(figtitle): # plt.title(figtitle) # adjust x axis if required. ax.set(xlabel=xlabel) if self.logx: ax.set(xscale="log") if self.minx is not None: ax.set_xlim(left=self.minx) if self.maxx is not None: ax.set_xlim(right=self.maxx) # adjust y axis if required. ax.set(ylabel=ylabel) if self.logy: ax.set(yscale="log") if self.miny is not None: ax.set_ylim(bottom=self.miny) if self.maxy is not None: ax.set_ylim(top=self.maxy) # Disable scientific notation on axes ax.ticklabel_format(useOffset=False, style='plain') # If there is reason to have a legend, do some extra processing. if ax.get_legend() is not None: legend = ax.get_legend() loc = None bbox_to_anchor = None if external_legend: # Set legend placement if not internal. loc = "upper left" # @todo - y offset should be LEGNED_BORDER_PAD trasnformed from font units to bbox. bbox_to_anchor = (1, 1 - self.legend_y_offset) # Get the handles and labels for the legend handles, labels = ax.get_legend_handles_labels() # Iterate the labels in the legend, looking for the huekey or stylekey as values # If either were found, replace with the pretty version found_count = 0 for i, label in enumerate(labels): if label == self.huekey: labels[i] = huelabel found_count += 1 elif label == self.stylekey: labels[i] = stylelabel found_count += 1 # If neither were found, set a legend title. if found_count == 0: # add an invisble patch with the appropriate label, like how seaborn does if multiple values are provided. handles.insert( 0, mpatches.Rectangle((0, 0), 1, 1, fill=False, edgecolor='none', visible=False, label=hs_label)) labels.insert(0, hs_label) pass ax.legend(handles=handles, labels=labels, loc=loc, bbox_to_anchor=bbox_to_anchor, borderaxespad=LEGEND_BORDER_PAD) # if an output directory is provided, save the figure to disk. if output_dir is not None: output_dir = pathlib.Path(output_dir) # Prefix the filename with the experiment prefix. output_filename = f"{output_prefix}--{self.filename}" # Get the path for output output_filepath = output_dir / output_filename # If the file does not exist, or force is true write the otuput file, otherwise error. if not output_filepath.exists() or force: try: if verbose: print(f"writing figure to {output_filepath}") fig.savefig(output_filepath, dpi=dpi, bbox_inches='tight') except Exception as e: print( f"Error: could not write to {output_filepath} with exception {e}" ) return False else: print( f"Error: {output_filepath} already exists. Specify a different `-o/--output-dir` or use `-f/--force`" ) return False # If not outputting, or if the show flag was set, show the plot. if show: # or output_dir is None: plt.show() return True
from statsmodels.sandbox.predict_functional import predict_functional # In[104]: values = {"hist": 0, "tumorsize": 50, "accinsitu": 0, "lymphinv": 0} # In[105]: pr, cb, fv = predict_functional(result, "age", values=values, ci_method="simultaneous") # In[106]: ax = sns.lineplot(fv, pr, lw=4) ax.fill_between(fv, cb[:, 0], cb[:, 1], color='grey', alpha=0.4) ax.set_xlabel("age") ax.set_ylabel("Re-excision") ax.set_title('Fitted Model: Log-odd probability of Age by Re-excision') #This plot of fitted log-odds visualizes the effect of age on reexcision for #hist=0, tumorsize=23, accinsitu=0 and lumphinv=0 by the glm fitted model #Slight negative correlation of age and RE are visible in this plot #For the specific described variables # In[ ]: # In[100]:
bins=range( int(company.Low.min()) - 1, int(company.Low.max()) + 1), normed=True, rwidth=1) plt.xticks(range(int(company.Low.min()) - 1, int(company.Low.max()) + 1)) plt.xlabel("Lowest Prize for each day") plt.ylabel("Probability") plt.title("Company: " + name.split(".")[0], fontsize=20) plt.show() # In[18]: for name, i, company in zip(companies_9, range(9), data): plt.figure(figsize=(15, 5)) sns.lineplot(data=data[i], x='Date', y="Open", label="Opening Value") sns.lineplot(data=data[i], x='Date', y="Close", label="Closing Value") sns.lineplot(data=data[i], x='Date', y="High", label="Highest Value Per Day") sns.lineplot(data=data[i], x='Date', y="Low", label="Lowest Value Per Day") plt.legend() #plt.xticks([]) plt.xlabel("Time") plt.ylabel("Stock Prices") plt.title("Compnay: " + name.split(".")[0]) plt.show() # In[23]:
#Print dataframe print("") print("") print("=" * 72) print(" " * 3, "Average time taken to run each sorting algorithm in milliseconds") print("=" * 72) print(df) print("=" * 72) totalEndTime = time.time() # Print time taked to execute program totalTimeElapsed = totalEndTime - totalStartTime print("") print("Program took ", round(totalTimeElapsed, 3), " seconds to run") # Testing functions #Save data in csv to plot graphs in seperate program quicker #df.to_csv('sortingDataframe.csv') # Plot data plot = sns.lineplot(data=df, markers=True, dashes=False) # plot using seaborn plot.set(xlabel='Array Size', ylabel='Time taken (milliseconds)') # label axis #plt.yscale("log") # Uncomment to graph log scale results for larger number of inputs plt.show() # Show plot
plt.style.use("ggplot") # plt.rcParams.update({'legend.fontsize': 14}) p = sns.color_palette() sns.set_palette([p[0], p[1]]) f, axes = plt.subplots(1, 1, figsize=(width, height)) # sns.lineplot(x='samples', y='success_rate', hue='algo', ax=axes[0], data=sr_timesteps) # axes[0].set_xlabel('samples') # axes[0].set_ylabel('success_rate') # axes[0].get_legend().remove() # sns.lineplot(x='samples', y='eval', hue='algo', ax=axes[1], data=eval_timesteps) # axes[1].set_xlabel('samples') # axes[1].set_ylabel('') # axes[1].get_legend().remove() sns.lineplot(x='iterations', y='len_mean', hue='algo', ax=axes, data=len_mean_iteration) axes.set_xlabel('iterations') axes.set_ylabel('episode length') axes.get_legend().remove() handles, labels = axes.get_legend_handles_labels() # if mode == 'train': # sns.lineplot(x='samples', y='success_rate', hue='algo', data=sr_timesteps) # axes.set_xlabel('samples') # elif mode == 'hard': # sns.lineplot(x='samples', y='eval', hue='algo', data=eval_timesteps) # axes.set_xlabel('samples') # elif mode == 'iteration': # sns.lineplot(x='iterations', y='eval', hue='algo', ax=axes, data=eval_iteration) # axes.set_xlabel('iterations')
"bleu": [0, 0], "data_ord": [0, 0], "data": ["nejm.0", "nejm.0"], "direction": ["zh2en", "en2zh"], "train": ["de novo", "de novo"] }) bleu = pd.concat([zeros, bleu]) ord2num = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 5.46} bleu["x"] = bleu["data_ord"].apply(lambda x: ord2num[x]) plt.ion() fig, ax = plt.subplots(1, 1) g = sns.lineplot(x="x", y="bleu", hue="direction", data=bleu, legend="brief", style="train", markers=["o", "o"], dashes=[(2, 1), ""]) fig.set_size_inches(5, 4) fig.tight_layout() g.legend_.texts[0].set_position((-40, 0)) g.legend_.texts[0].set_text("Direction") g.legend_.texts[3].set_position((-40, 0)) g.legend_.texts[3].set_text("Training") ax.set_xlabel("In-Domain Sentence Pairs") ax.set_ylabel("1-ref BLEU") ax.set_xticks([0, 1, 2, 3, 4, 5, 5.46]) ax.set_xticklabels(["0", "4000", "8000", "16000", "32000", "64000", ""]) ax.legend() plt.savefig(f"{out_dir}/bleu.pdf")
kde_kws={'linewidth': 3}, label=race) # Plot formatting plt.legend(prop={'size': 16}, title='Race') plt.title('Victim Race and Median Income (2017 - 2018)') plt.xlabel('Median Income') plt.ylabel('Density') # line chart for victim race and income #sns.lineplot(x="timepoint", y="signal", data=fmri) sns.lineplot(x='Victim Descent', y='Median Income', data=la, markers=True, dashes=False) #plot formatting #plt.legend(prop={'size': 16}, title = 'Race') plt.title('Victim Descent and Median Income (2017 - 2018)') # line chart for victim race and income #sns.lineplot(x="timepoint", y="signal", data=fmri) sns.lineplot(x='Victim Age', y='Median Income', data=la) #plot formatting #plt.legend(prop={'size': 16}, title = 'Race')
""" Timeseries plots with error bands ================================= _thumb: .5, .45 """ import seaborn as sns sns.set(style="darkgrid") # Load an example dataset with long-form data fmri = sns.load_dataset("fmri") # Plot the responses for different events and regions sns.lineplot(x="timepoint", y="signal", hue="region", style="event", data=fmri)
df_acc = pd.DataFrame(index=ary_epochs) for ind in range(len(lst_evnt_trn)): # add data to pandas loss data frame df_loss['trn_loss' + lst_names[ind]] = lst_trn_lss[ind] df_loss['val_loss' + lst_names[ind]] = lst_val_lss[ind] # add data to pandas loss data frame df_acc['trn_acc' + lst_names[ind]] = lst_trn_acc[ind] df_acc['val_acc' + lst_names[ind]] = lst_val_acc[ind] # plot losses fig, ax = plt.subplots() fig.set_size_inches(17.5, 12.5) sns.lineplot(data=df_loss, palette=lst_colors, dashes=lst_dashes, linewidth=2.5) plt.xlabel("Number of Epochs") plt.ylabel("Loss") fig.savefig( "/media/sf_D_DRIVE/Unet/presentation/results/plots/loss_model_weighted.svg" ) fig.savefig( "/media/sf_D_DRIVE/Unet/presentation/results/plots/loss_model_weighted.png" ) # plot accuracies fig, ax = plt.subplots() fig.set_size_inches(17.5, 12.5) sns.lineplot(data=df_acc, palette=lst_colors, dashes=lst_dashes, linewidth=2.5) plt.xlabel("Number of Epochs")
def plot(self, data=None, **fargs): if data is None: data = self.df data = data.melt("timeStamp", var_name="cols", value_name="vals") sns.lineplot(data=data, **fargs)
pass test_scorecard_array = np.asarray(test_scorecard) test_performance.append(test_scorecard_array.sum() / test_scorecard_array.size) print("testing performance = ", test_scorecard_array.sum() / test_scorecard_array.size) pass # plot performance sns.set(rc={'figure.figsize': (15, 10)}) performance_x = np.vstack((train_performance, test_performance)).T performance_x_df = pd.DataFrame(performance_x).reset_index() df = performance_x_df.melt('index', var_name='train/test', value_name='vals') df['train/test'] = df['train/test'].map({0: 'train', 1: 'test'}) performance_x_plot = sns.lineplot(x="index", y="vals", hue='train/test', data=df) performance_x_plot.set(xlabel='EPOCHS', ylabel='PERFORMANCE') plt.savefig('performance_x.png') # confusion matrix a_x = pd.Series(np.asarray(actual)) p_x = pd.Series(np.asarray(predicted)) confusion_matrix_x = pd.crosstab(a_x, p_x) confusion_matrix_x # for experiment 2... # shuffle and sample quarter of the training data np.random.shuffle(train_data) quarter_train_data = train_data[:15000]
sns.get_dataset_names() # ## lineplot # In[61]: fmri = sns.load_dataset("fmri") # In[20]: fmri.head() # In[33]: ax = sns.lineplot(x="timepoint", y="signal", err_style="band", data=fmri) # In[28]: ax = sns.lineplot(x="timepoint", y="signal", err_style="bars", data=fmri) # In[63]: ax = sns.lineplot(x="timepoint", y="signal", ci=95, color="m", data=fmri) ax = sns.lineplot(x="timepoint", y="signal", ci=68, color="b", data=fmri) # In[35]: ax = sns.lineplot(x="timepoint", y="signal", ci='sd', color="m", data=fmri) # In[37]:
import seaborn as sns data = pd.read_csv("Processed-text.csv", low_memory=False, index_col=[2], parse_dates=[2]).sort_index() data.drop(columns=['Unnamed: 0', 'body'], inplace=True, axis=1) #Sales Distribution basis on comment monthly_order_count = data.resample('M').count() #orders_monthly.dropna(how='any',inplace=True) monthly_order_count = pd.DataFrame(monthly_order_count) #plt.plot_date(x=orders_monthly.index,y=orders_monthly['rating'],fmt='o') monthly_order_count.rename(columns={'rating': 'Sales'}, inplace=True) ax = sns.lineplot(x=monthly_order_count.index, y=monthly_order_count['Sales'], data=monthly_order_count, markers=True) #Overall sentiment over time period orders_monthly = data.resample('M').median() orders_monthly = pd.DataFrame(orders_monthly) ax = sns.lineplot(x=orders_monthly.index, y=orders_monthly['rating'], data=orders_monthly, markers=True) import plotly.graph_objs as go import plotly.offline as py actual_chart = go.Scatter(x=orders_monthly.index, y=orders_monthly['Sales'],
df.loc[:, 'dh'] = df.loc[:, 'h'].diff() / pd.Timedelta(dt).total_seconds() step = 1 for t in tides_rep_with_slr.index[1:]: t_min_1 = t - pd.Timedelta(dt) df.loc[t, 'z'] = calc_z(df.at[t_min_1, 'z'], df.at[t_min_1, 'dz'], 0, 0) df.loc[t, 'C0'] = calc_c0( df.at[t, 'h'], df.at[t, 'dh'], df.at[t, 'z'], A, SSC) df.loc[t, 'C'] = calc_c(df.at[t, 'C0'], df.at[t, 'h'], df.at[t_min_1, 'h'], df.at[t, 'dh'], df.at[t_min_1, 'C'], df.at[t, 'z'], ws, dt_sec) df.loc[t, 'dz'] = calc_dz(df.at[t, 'C'], ws, rho, dt_sec) print('Completed step {0} of {1}.'.format(step, len(tides_rep_with_slr))) step = step + 1 # %% f, ax = plt.subplots(figsize=(5,6)) sns.set_style("whitegrid") plt_data = df.shift(freq=pd.Timedelta(days=1326)) plt_data = plt_data[::10] sns.lineplot(data=plt_data.h, alpha = 0.25) sns.lineplot(data=plt_data.z, color = 'black') plt.title("Tidal Heights and Land Surface") plt.xlabel("Year") plt.ylabel("Height above Mean Water Level (m)") plt.xlim(start + pd.Timedelta(days=1326), rep_end + pd.Timedelta(days=1326)) plt.show() #%% #tides_rep.tail()