Exemplo n.º 1
0
def plot(df: pd.DataFrame, id_vars: list, value_vars: list):
    """
    Plots a data frame.

    :param df: dataframe to plot
    :type df: pd.DataFrame
    :param id_vars: list of column names for the x-axis
    :type id_vars: list or str
    :param value_vars: list of column names for the y-axis
    :type value_vars: list
    :return: melted data frame
    :rtype: pd.DataFrame
    """
    if not isinstance(id_vars, list):
        id_vars = [id_vars]
    data = pd.melt(df, id_vars=id_vars, value_vars=value_vars)

    sns.lineplot(x=id_vars[0], y="value",
                 hue="variable",
                 data=data)
    return data
    def generate_plot(data: pd.DataFrame, output_path: str) -> None:
        """
        Generates am image plot for the given dataframe
        :param data: The dataframe to generate the plot for
        :param output_path: THe file output path
        """
        palette = {'L': '#875438', 'C': '#0E0C0C', 'U': '#8A8D91', 'R': '#C1A15B', 'M': '#EC7802'}
        plt = sns.lineplot(data=data, palette=palette, linewidth=1.5, hue='A')
        plt.set(ylabel='Average Proportion of Deck')
        fig = plt.figure

        fig.savefig(output_path)
df['counts'] = df.groupby('datetime', as_index=False)['datetime'].transform(lambda s: s.count())

x = []; y=[]
for point in df['sentiment']:
   x.append(point[0])
   y.append(point[1])
    

plt.figure(figsize=(10, 10))
plt.scatter(x, y, alpha=0.5)
plt.title("Sentiment:Polarity vs. Subjectivity")
plt.xlabel("Polarity")
plt.ylabel("Subjectivity")
plt.show()

plt.figure(figsize=(10, 10))
sns.lineplot(x="datetime", y="counts", data=df)
plt.xticks(rotation=15)
plt.title('Count of Tweet Times By Minute')
plt.xlabel('Time(Minutes)')
plt.ylabel('Count')
plt.show()

df['datetime'] = pd.to_datetime(df['datetime'], format='%m/%d/%y %H:%M').dt.hour
plt.figure(figsize=(10, 10))
sns.lineplot(x="datetime", y="counts", data=df)
plt.xticks(rotation=15)
plt.title('Count of Tweet Times By Hour')
plt.xlabel('Time(Hours)')
plt.ylabel('Count')
plt.show()
def nucleotide_difference_imbalance_plot_stylized_like_Figure_8_of_Morrill_et_al_2016(
    sequence_file, chr_, position_corresponding_to_first_nt,
    chunk_size = chunk_size, overlap_specified = overlap_specified,
    save_vg=False, return_fig=False):
    '''
    Main function of script. 

    Make a plot figure like Figure 8, panel B of Morrill et al 2016 
    (PMID: 27026700), see  
    https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4882425/figure/F8/ .

    Inputs:
    - sequence file of spanning desired coordinates in FASTA format with 
    single sequence in it. (Or at least first sequence be the one to use.)
    - text string of designation to use for chromosome/contig/scaffold/region in
    plot labels
    - position corresponding to first bases in provided sequence as an integer.
    To be used to label positions along coordinates in x-axis of plot.

    Optionally you can provide integer settings for the `chunk_size` and 
    `overlap_specified` for the analyses windows. Without specifying them, by 
    default they are set to mirror Figure 8, panel B of Morrill et al 2016 
    (PMID: 27026700).

    You can set `return_fig` to return a plot figure object and would be useful
    if calling from a Jupyter notebook or IPython. If you assign it to `ax` in
    your notebook, you can redisplay it in another cell via `ax.figure`.

    Saving as vector graphics is also an option. It is not the default because
    file PNG-style image files are more familiar to most folks and more 
    convenient in Jupyter notebooks.

    Saves an image file of the plot, or optionally, returns a plot object.
    '''

    # Retrieve sequence from sequence file and break it up into the set chunks
    #--------------------------------------------------------------------------- 
    seq_entries = Fasta(sequence_file)
    seq = seq_entries[0] # assume first one is the one to be used
    chunks = (
        list(gen_chunk_string_with_different_step(seq,chunk_size,step_size)))
    #discard any chunks at end less than the size of the set window
    chunks = [x for x in chunks if len(x)== chunk_size]


    # Assign midpoint postions to each chunk based on chunk_size, start position
    # input, and provided sequence length.
    # ALSO, while going through chunks, might as well do calculation too:
    #Calculate G vs. C and A vs. T that I think might give results like Figure 8 
    # of Morrill et al 2016 (PMID: 27026700)
    #---------------------------------------------------------------------------
    #determining midpoints for each chunk (relative length of provided sequence) 
    # and assiging to a list. (used a dictionary in development but a list
    # should be better for python 2.7 compatibility)
    # Had calculation as separate step in development but why interate again.
    # (Doing similar thing where now storing `chunks_diffs` as list and 
    # not diction for  python 2.7 compatibility.)
    import sys
    #chunks_midpoints = {} #key will be index of chunk in chunks
    chunks_midpoints = []
    #chunks_diffs = {} #key will be index of chunk in chunks. value will be tuple 
    # with each diff as an item
    chunks_diffs = []
    for indx,chunk in enumerate(chunks):
        # handle first chunk without much fanfare because easy calculation and
        # doesn't depend on a previous one
        if indx == 0 and len(chunk)== chunk_size:
            #chunks_midpoints[indx] = chunk_size/2
            chunks_midpoints.append(chunk_size/2)
        elif len(chunk)== chunk_size:
            start_curr_chunk = step_size * indx
            end_curr_chunk = step_size * indx + chunk_size
            #chunks_midpoints[indx]= midpoint((start_curr_chunk,end_curr_chunk)) 
            chunks_midpoints.append(midpoint((start_curr_chunk,end_curr_chunk)))
        else:
            sys.stderr.write("\n\nError? Issue with size of sequence chunks "
                "not matching expected/\n") #shouldn't happen
            sys.exit(1)
        GC_diff = calc_nt_diff("GC",str(chunk)) # casting 'Sequence' object to 
        # string with `str(chunk)`
        AT_diff = calc_nt_diff("AT",str(chunk)) # casting 'Sequence' object to 
        # string with `str(chunk)`
        #chunks_diffs[indx] = (GC_diff,AT_diff)
        chunks_diffs.append((GC_diff,AT_diff))

    # Adjust potions of midpoints to account for first base in sequence file 
    # being not first base of chromosome
    #---------------------------------------------------------------------------
    # correct chunks_midpoints to take into account that first postion in 
    # provided sequence might not be first position of that sequence along 
    # chromosome so that labels for x-axis will match situation
    # (This was done with a dictionary comprehension in development where used
    # dictionary)
    start_pos = position_corresponding_to_first_nt
    #chunks_midpoints = {k:v+start_pos for k,v in chunks_midpoints .items()}
    chunks_midpoints = [x+start_pos for x in chunks_midpoints]




    '''Combined into first interation through list of chunks above
    #Calculate G vs. C and A vs. T that I think might give results like Figure 8 
    # of Morrill et al 2016 (PMID: 27026700)
    #---------------------------------------------------------------------------
    chunks_diffs = {} #key will be index of chunk in chunks. value will be tuple 
    # with each diff as an item
    for indx,seq in enumerate(chunks):
        GC_diff = calc_nt_diff("GC",str(seq)) # casting 'Sequence' object to 
        # string with `str(seq)`
        AT_diff = calc_nt_diff("AT",str(seq)) # casting 'Sequence' object to 
        # string with `str(seq)`
        chunks_diffs[indx] = (GC_diff,AT_diff)
    '''


    #Make the plot
    #---------------------------------------------------------------------------
    sns.set()
    plt.figure(figsize=default_plt_image_size)
    #indx = list(chunks_midpoints.values())
    #data = list(chunks_diffs.values())
    #indx and data were originally organized in dictionaries during development
    # but for better compatibility with Python 2.7 lists were used so order
    # maintained
    #df = pd.DataFrame(data, indx, ["CvsG", "TvsA"])
    df = pd.DataFrame(chunks_diffs, chunks_midpoints, ["G-C", "A-T"]) # went 
    # with what is used Morrill et al. figure for line labels even though 
    # `["CvsG", "TvsA"])` is more descriptive to what is being plotted.
    ax = sns.lineplot(data=df)
    ax.set_ylabel(yaxis_label, fontsize = 16);
    ax.set_xlabel(xaxis_label_prefix+chr_, fontsize = 16);
    ax.legend(fontsize= 12);



    # Return plot figure object (meant for calling from Jupyter cell or IPython)
    # or save to file.
    #---------------------------------------------------------------------------
    if return_fig:
        sys.stderr.write("Plot figure object returned.")
        return ax
    else:
        #save image; standard for when called from command line; however, will
        # also be default for when called from Jupyter notebook / IPython unless 
        # called with `return_fig=True` 
        output_file_name = generate_output_file_name(sequence_file, 
            suffix_for_saving_plot)
        if save_vg:
            plt.savefig(output_file_name[:-4]+".svg", 
            orientation='landscape') # FOR VECTOR GRAPHICS; useful if merging 
            # into Adobe Illustrator. Based on 
            # https://neuroscience.telenczuk.pl/?p=331 ; I think ReportLab also 
            # outputs SVG?
            sys.stderr.write("\nPlot image saved to: {}\n".format(
                output_file_name[:-4]+".svg"))
        else:
            # save png
            plt.savefig(output_file_name)
            sys.stderr.write("\nPlot image saved to: {}\n".format(
                output_file_name))
Exemplo n.º 5
0
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt

if __name__ == '__main__':
    fmri = sns.load_dataset("fmri")
    ax = sns.lineplot(x="timepoint", y="signal", data=fmri)
    plt.show()
Exemplo n.º 6
0
# print(df_orig['del_rate'])

print(
    "median error rate/subs/ins/del original:", median_error
)  #, 100*df_orig['subs_rate'].median(), 100*df_orig['ins_rate'].median(), 100*df_orig['del_rate'].median())

# g = sns.catplot(x="abundance_original", y="abundance_corrected", col="Depth", col_wrap=3,
#             data=indata, kind="point", aspect=1)
# ax = sns.scatterplot(x="abundance_original", y="abundance_corrected", #col="Depth", # col_wrap=3, #hue="time",
#                       data=indata)
# g = sns.lmplot(x="transcript_cov", y="err_rate",  hue="type", scatter= False, x_estimator = np.mean,  #x_jitter = 0.1, #col="Depth",
#                 x_ci= 'sd', data=indata) #, col_wrap=2, height=3)

ax = sns.lineplot(x="transcript_cov",
                  y="err_rate",
                  hue="type",
                  ci='sd',
                  estimator='median',
                  data=indata)
ax.set_ylim(0, 12)
ax.set_xscale('log')
ax.set_xticks(
    [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
ax.set_ylabel("Error rate (%)")
ax.set_xlabel("Reads per transcript")

# g.set(ylim=(0,100))
# ax.set_ylabel("Abundance after correction")
# ax.set_xlabel("Abundance before correction")

# g.set_ylabels("Error rate (%)")
# g.set_xlabels("Reads per transcript")
Exemplo n.º 7
0
def lineplot_per_interval(df,
                          y,
                          hue,
                          y_title="",
                          start_year=DEFAULT_MIN_YEAR,
                          save_to_file=True,
                          filetype="pdf",
                          legend_loc="upper right",
                          hue_order=None,
                          legend_title="Snapshot rank",
                          estimator=np.median):
    fig, ax = plt.subplots()
    if not y_title:
        if hue:
            y_title = hue.title()
        else:
            y_title = ""

    # filter by start year
    df = df[df.year_season >= str(start_year)]

    set_plot_params()

    if hue:
        try:
            fig = sns.lineplot(data=df,
                               y=y,
                               x=INTERVAL_COL_NAME,
                               hue=hue,
                               style=hue,
                               hue_order=hue_order,
                               estimator=estimator,
                               ci=95)
        except:
            markers = list(range(1, len(hue_order) + 1))
            print(markers)
            fig = sns.lineplot(data=df,
                               y=y,
                               x=INTERVAL_COL_NAME,
                               style=hue,
                               hue=hue,
                               dashes=False,
                               markers=markers,
                               hue_order=hue_order)
        set_legend(ax, legend_title, legend_loc)
        plt.setp(fig.get_legend().get_texts(),
                 fontsize='10')  # for legend text
        plt.setp(fig.get_legend().get_title(),
                 fontsize='10')  # for legend text

    else:
        fig = sns.lineplot(data=df,
                           y=y,
                           x=INTERVAL_COL_NAME,
                           estimator=estimator)

    set_x_ticks(df)

    #fig.set(xlabel='Interval', ylabel=y_title)
    ax.set_xlabel('Interval')
    ax.set_ylabel(y_title, fontsize=10)

    # fig.set_title(y_title + " per interval")
    fig_filename = "lineplot_%s.%s" % ("_".join(
        y_title.replace("%", "pct").replace("(", "").replace(
            ")", "").lower().split()), filetype)
    if save_to_file:
        save_figure(fig, fig_filename)
    return fig
Exemplo n.º 8
0
    has_latest_model = True
    predictions_latest, _ = dataset.predict(df, './models/latest', True)
    print('Mean error latest : ' + str(dataset.mean_error(np.array(predictions_latest), y)))
    success_rate_latest = dataset.get_trend_success_rate(predictions_latest, y, df)
    print('Trend prediction success for latest : ' + str(success_rate_latest) + '%')
except:
    has_latest_model = False
    predictions_latest = None
    pass


print('Mean error reference : ' + str(dataset.mean_error(np.array(predictions_reference), y)))
success_rate_reference = dataset.get_trend_success_rate(predictions_reference, y, df)
print ('Trend prediction success for reference : ' + str(success_rate_reference) + '%')


figure(num=None, figsize=(24, 10), dpi=80, facecolor='w', edgecolor='k')

ax = sns.lineplot(x=predictions_reference.index, y=y, label="Test Data", color='blue')
ax = sns.lineplot(x=predictions_reference.index, y=predictions_reference[0], label="Prediction reference", color='gray')

if has_latest_model:
    ax = sns.lineplot(x=predictions_reference.index, y=predictions_latest[0], label="Prediction latest", color='red')

ax.set_title('Price', size = 14, fontweight='bold')
ax.set_xlabel("Hours", size = 14)
ax.set_ylabel("Cost", size = 14)
ax.set_xticklabels('', size=10)

plt.show()
Exemplo n.º 9
0
homogeneity = {}

# use kmeans to loop over candidate number of clusters
# store inertia and homogeneity score in each iteration

for k in range(1, 5):
    km = KMeans(n_clusters=k)
    pred = km.fit_predict(x)
    inertia[k] = km.inertia_
    homogeneity[k] = homogeneity_score(y, pred)

# %%
ax = sns.lineplot(
    x=list(inertia.keys()),
    y=list(inertia.values()),
    color="blue",
    label="inertia",
    legend=None,
)
ax.set_ylabel("inertia")
ax.twinx()
ax = sns.lineplot(
    x=list(homogeneity.keys()),
    y=list(homogeneity.values()),
    color="red",
    label="homogeneity",
    legend=None,
)
ax.set_ylabel("homogeneity")
ax.figure.legend()
daily_data.dtypes
dfNW56Articles.reset_index(inplace=True, drop=True)  #reset index

#merge
myArticles = pd.merge(dfNW56Articles, daily_data, on='date', sort=True)
myArticles.describe()  #95 articles au final.
# Sauvegarde de myArticles en csv  pourrait servir dans d'autres articles.
myArticles.to_csv("myArticles.csv", sep=";", index=False)  #séparateur ;

##########################################################################
# Graphiques des événements et des publications des articles
#   sur toute la période
##########################################################################
sns.set()  #paramètres esthétiques ressemble à ggplot par défaut.
fig, ax = plt.subplots()  #un seul plot
sns.lineplot(x='date', y='pageviews', data=daily_data, color='grey', alpha=0.2)
sns.scatterplot(x='date',
                y='pageviews',
                data=myOutliers,
                color='red',
                alpha=0.5)
sns.scatterplot(x='date',
                y='pageviews',
                data=myArticles,
                color='blue',
                marker="+")
fig.suptitle(str(len(myOutliers)) + " événements (ronds rouges) pour " +
             str(len(myArticles)) + " articles (croix bleues) : ",
             fontsize=14,
             fontweight='bold')
ax.set(
Exemplo n.º 11
0
train_df['year'] = train['date'].dt.year
train_df['month'] = train['date'].dt.month
train_df['day'] = train['date'].dt.dayofyear
train_df['weekday'] = train['date'].dt.weekday

train_df.head()

# --------------------------------------------------------------------------------------------------
# ---- Scomposizione Time Series
# per cominciare bisogna scomporre la serie per studiare:
# -- la stagionalità
# -- il trend
# -- i residui
# dato che abbiamo 5 anni di dati ci aspettiamo una stagionalità annuale o settimanale

sns.lineplot(x="date", y="sales",legend = 'full' , data=train_df)
# dal grafico sopra possiamo vedere un trend crescente ed una stagionalità annuale
# sembra che nei mesi centrali ci sia un picco nelle vendite

sns.lineplot(x="date", y="sales",legend = 'full' , data=train_df[:28])
# dal grafico sopra non vediamo chiaramente la presenza di una stagionalità settimanale

sns.boxplot(x="weekday", y="sales", data=train_df)
# da questo grafico (sopra) possiamo vedere come in media ci sia un lieve trend crescente
# nelle vendite all'interno della settimana
# Lunedì = 0 ---- Domenica = 6
# inoltre possiamo vedere che nei giorni infra-settimanali le vendite siano minori che nel weekend

# creo train_df
train_df = train_df.set_index('date')
train_df['sales'] = train_df['sales'].astype(float)
                                    temp[i, j]
                                ])

                    hv_df = pd.DataFrame(hv_progress_df,
                                         columns=[
                                             'Evaluations', 'Runs',
                                             'Approaches', 'HV (Underlying)'
                                         ])

                    print(hv_df)
                    color_map = plt.cm.get_cmap('viridis')
                    color_map = color_map(np.linspace(0, 1, 5))
                    ax = sns.lineplot(x="Evaluations",
                                      y="HV (Underlying)",
                                      hue="Approaches",
                                      style="Approaches",
                                      markers=True,
                                      dashes=False,
                                      data=hv_df,
                                      palette=color_map)
                    #box = ax.get_position()
                    handles, labels = ax.get_legend_handles_labels()
                    #ax.legend(handles=handles, labels=labels)
                    ax.legend(handles=handles[1:],
                              labels=labels[1:],
                              frameon=False,
                              loc='upper center',
                              bbox_to_anchor=(0.5, 1.15),
                              ncol=5)
                    #plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

                    #ax.set_title(prob + ', ' + str(obj) + ' objs, ' + sx)
# finding outliers of purchases
sns.boxplot(cc_clean['purchases'], color='#fdc029')
plt.show()

# finding outliers of cash_advance
sns.boxplot(cc_clean['cash_advance'], color='#fdc029')
plt.show()

# finding outliers of payments
sns.boxplot(cc_clean['payments'], color='#fdc029')
plt.show()

# plotting balance and different purchase options 
plt.figure(figsize=(10,4))
sns.lineplot(cc_clean['balance'],cc_clean['purchases'],label="Purchases")
sns.lineplot(cc_clean['balance'],cc_clean['oneoff_purchases'],label='Oneoff')
sns.lineplot(cc_clean['balance'],cc_clean['installments_purchases'],label='Install')
plt.show()

sns.pairplot(cc_clean)

"""# Hierarchical clustering"""

# using standardscaler to reduce the distance of each variable
sc = StandardScaler()
xs = sc.fit_transform(cc_clean)
X = pd.DataFrame(xs, index=cc_clean.index, columns=cc_clean.columns)

# hclust
# going to do euclidean, cosine and cityblock distance
Exemplo n.º 14
0
def do_plot(df, thr, min_epc, hue='dset', title='', legend_hue=False):
    sns.set_theme()
    scrtmarkers = {'optima': 'o', 'stop': 'X', 'optima/stop': 'D'}
    plt.subplot(211)
    if title:
        plt.title(title)

    gsc = sns.scatterplot(data=df[df['convergence'] != ''],
                          x='epoch',
                          y='err_prior',
                          hue=hue,
                          markers=scrtmarkers,
                          s=100,
                          style='convergence',
                          legend=True)
    # get current handles and labels
    current_handles, current_labels = plt.gca().get_legend_handles_labels()

    # remove title
    t_ = [(h, l) for h, l in zip(current_handles, current_labels)
          if (('optima' in l) or ('stop' in l))]
    current_handles = [t[0] for t in t_]
    current_labels = [t[1] for t in t_]
    conv_leg = plt.legend(current_handles,
                          current_labels,
                          bbox_to_anchor=(1.0, 1.0),
                          loc='upper right')

    g = sns.lineplot(data=df,
                     x='epoch',
                     y='err_prior',
                     hue=hue,
                     legend=legend_hue)
    if legend_hue:

        # get handles of "hue"
        n_hue = df[hue].unique().size
        current_handles, current_labels = plt.gca().get_legend_handles_labels()
        current_handles = current_handles[:n_hue]
        current_labels = current_labels[:n_hue]
        plt.legend(current_handles,
                   current_labels,
                   bbox_to_anchor=(0.75, 1.),
                   loc='upper right')
        # We need this because the 2nd call to legend() erases the first #
        g.add_artist(conv_leg)

    g.set(xlabel=None)
    plt.ylabel("Mean absolute error \n of priors")
    plt.subplot(212)
    g = sns.lineplot(data=df,
                     x='epoch',
                     y='var_pseudo_neg',
                     hue=hue,
                     legend=False)
    plt.axhline(y=thr, color='r', linestyle='--')
    plt.axvline(x=min_epc, color='g', linestyle='--')
    g = sns.scatterplot(data=df[df['convergence'] != ''],
                        x='epoch',
                        y='var_pseudo_neg',
                        hue=hue,
                        legend=False,
                        markers=scrtmarkers,
                        s=100,
                        style='convergence')
    plt.ylabel("variance of\n pseudo negatives")

    return plt.gcf()
Exemplo n.º 15
0
sns.barplot(data=four_most_popular, y='item_name', x='revenue')
plt.ylabel('')
plt.xticks()

# 5.) Load the sleepstudy data and read it's documentation.
# Use seaborn to create a line chart of all the individual subject's reaction times and a more prominant line showing the average change in reaction time.

sleep = data('sleepstudy')
data('sleep', show_doc=True)
sleep.info()
sleep.tail(20)
sleep.describe()

sns.set_style('darkgrid')
sns.set_context(font_scale=1,
                rc={
                    "grid.linewidth": 1,
                    "axes.linewidth": 1,
                    "ytick.major.width": 1,
                    "xtick.major.width": 1,
                    "lines.linewidth": 1
                })
palette = sns.color_palette("deep", 18)
sns.lineplot(x='Days',
             y='Reaction',
             hue='Subject',
             data=sleep,
             palette=palette)
sns.set_context(rc={"lines.linewidth": 4})
sns.lineplot(x='Days', y='Reaction', data=sleep, ci=None)
Exemplo n.º 16
0
            A = 1/(12 * K * (K * N))
            B = (k + N - 1/2)/(K*(K+N))
            func = lambda tau : -1/tau**2 + 2*A*tau + B
            taus = []
            for j in [10,500,100]:
                tau_initial_guess = 1
                tau_solution = fsolve(func, tau_initial_guess)
                taus.append(tau_solution)
            tau_solution = np.mean(taus)
            points.append({"k":k,"alpha":tau_solution,"N":N,"K":K})
pfx = pd.DataFrame(points)
pfx['N'] = pfx.N.astype(str)
pfx.to_csv("simulation_data.tsv",sep="\t")
plt.savefig("simulation.pdf",dpi=300)
pfx = pd.read_csv("simulation_data.tsv",sep="\t")
sns.lineplot(pfx.K,pfx.alpha,  label = "Simulated data")

def fit_func(x, a, b):
    return a*np.power(x,2/3) + b

params = curve_fit(fit_func, pfx.K, pfx.alpha)
a, b = params[0]

X_space = pfx.K
Y_space = a * np.power(X_space,2/3) + b

plt.plot(X_space,Y_space, label = r"$"+str(np.round(a,2))+r" \cdot K^{\frac{2}{3}} "+str(np.round(b,2))+r"$")
plt.legend()
plt.ylabel(r"$\gamma$")
#plt.title(r"Fitting $\gamma$ estimate to the space of 250,000 networks")
plt.savefig("simulation_final.pdf",dpi=300)
Exemplo n.º 17
0
def plot_linear(k,X,y,I_pca,I_spearman,I_mRMR,I_hierarchy,I_sklearn,I_permutation_oob,I_dropcol_oob):
    df_linear = pd.DataFrame({'k':np.arange(1,k+1)
                       , 'mae_pca':mae_I_linear(X,y,sort_I(I_pca),k)
                       ,'mae_spearman':mae_I_linear(X,y,sort_I(I_spearman),k)
                       ,'mae_mRMR':mae_I(X,y,sort_I(I_mRMR),k)
                       ,'mae_hierarchy':mae_I_linear(X,y,sort_I(I_hierarchy),k)
                       ,'mae_sklearn':mae_I_linear(X,y,sort_I(I_sklearn),k)
                       ,'mae_permutation_oob':mae_I_linear(X,y,sort_I(I_permutation_oob),k)
                       ,'mae_dropcol_oob':mae_I_linear(X,y,sort_I(I_dropcol_oob),k)
                      })
    sns.lineplot(df_linear['k'],df_linear['mae_pca'],marker='o', label='pca')
    sns.lineplot(df_linear['k'],df_linear['mae_spearman'],marker='p', label='spearman')
    sns.lineplot(df_linear['k'],df_linear['mae_mRMR'],marker='1', label='mRMR')
    sns.lineplot(df_linear['k'],df_linear['mae_hierarchy'],marker='s', label='hierarchy')
    sns.lineplot(df_linear['k'],df_linear['mae_sklearn'],marker='d', label='sklearn')
    sns.lineplot(df_linear['k'],df_linear['mae_permutation_oob'],marker='*', label='permutation')
    sns.lineplot(df_linear['k'],df_linear['mae_dropcol_oob'],marker=11, label='dropcol')
    plt.ylabel('20% 5fold CV MAE ($)')
    plt.title('linear model')
Exemplo n.º 18
0
                           sep="\t",
                           names=["Ref", "Pos", "depth"])

sns.lmplot(x="POS", y="ALT_FREQ", data=rep_b, fit_reg=False)

fig, ax1 = plt.subplots(figsize=(15, 5))
color = "tab:green"
#isnv plot creation
ax1.set_ylabel('iSNVs', color="blue", fontsize=20)
ax1.tick_params(axis='y', labelcolor="blue")
ax1 = sns.regplot(x="POS", y="ALT_FREQ", data=rep_b, ax=ax1, palette='summer')
#specify we want to share same x axis
ax2 = ax1.twinx()
color = 'tab:red'
# lineplot creation for the depth
sns.lineplot(x='Pos', y='depth', data=japan1_depth, ax=ax2)
ax2.set_ylabel('Depth', color="red")
ax2.tick_params(axis='y', labelcolor="red")
plt.show()

fig, ax1 = plt.subplots()

ax1.set_xlabel('Position')
ax1.set_ylabel('iSNVs', color="blue")
ax1.tick_params(axis='y', labelcolor="blue")
rep_a.plot(x="POS",
           y="ALT_FREQ",
           label="1_peru",
           ax=ax1,
           kind="scatter",
           color="blue",
Exemplo n.º 19
0
# Sets figure parameters
plt.figure(figsize=(6,5))
plt.rcParams['axes.titlesize'] = 13
plt.rcParams['axes.labelsize'] = 13
plt.rcParams['xtick.labelsize'] = 13
plt.rcParams['ytick.labelsize'] = 13
plt.rcParams['legend.fontsize'] = 12
plt.xlabel('Number of tasks (T)', fontsize=13)
plt.ylabel('Makespan (a.u.)', fontsize=13)
plt.xticks(range(1000,10001,1000))
plt.xticks(rotation=15)
plt.ylim(0, 12000)

sns.lineplot(data=recursive_results[recursive_results.Resources == 10],
             x='Tasks',
             y='Makespan',
             hue='Scheduler',
             linewidth=2,
             hue_order=order)

plt.savefig("s1-recursive-10.pdf", bbox_inches='tight')


# In[ ]:


# Sets figure parameters
plt.figure(figsize=(6,5))
plt.rcParams['axes.titlesize'] = 13
plt.rcParams['axes.labelsize'] = 13
plt.rcParams['xtick.labelsize'] = 13
plt.rcParams['ytick.labelsize'] = 13
Exemplo n.º 20
0
            fn, columns=["year_season", "policy_snapshot_url", "match_str"])

    cache[identifier] = results_df

    return results_df


def plot_term(df,
              term,
              regex=False,
              case=False,
              min_year=DEFAULT_MIN_YEAR,
              save_figure=True):
    plt.figure(figsize=(10, 5))
    percentages = search_term(df, term, regex, case, min_year)
    fig = sns.lineplot(x="interval", y="percentage", data=percentages)
    fig.set_xticklabels(fig.get_xticklabels(), rotation=45, fontsize='small')
    if not final:
        title = "Query: %s\n(Min-year: %s, Regex: %s, Case sensitive: %s)" % (
            term, min_year, regex, case)
        fig.set_title(title)

    plt.ylim(ymin=0)

    if save_figure:
        s_fig = fig.get_figure()
        s_fig.savefig("figures/%s_%s_%s_%s.png" %
                      (term, min_year, regex, case),
                      bbox_inches='tight')
    return fig
Exemplo n.º 21
0
    all_recalls[model] = mean_recall * 100
    all_maps[model] = np.mean(maps) * 100

    if APPROXIMATE:
        print(all_recalls[model])
        print(len(all_recalls[model]))
        if len(all_precisions[model]) > 1000:
            last_precision = all_precisions[model][-1]
            last_recall = all_recalls[model][-1]
            all_precisions[model] = all_precisions[model][0::50]
            all_recalls[model] = all_recalls[model][0::50]
            all_precisions[model] = np.append(all_precisions[model], last_precision)
            all_recalls[model] = np.append(all_recalls[model], last_recall)
        print(all_recalls[model])
        print(len(all_recalls[model]))
    sns.lineplot(all_precisions[model], all_recalls[model], label=str(model_print_name), linewidth=3)

plt.xlim([0.0, 100])
plt.ylim([0.0, 105])
plt.title("WHEN: Precision-recall plot")
plt.legend(loc="lower left")
sns.despine()
plt.savefig("./figures/roc_legend.png")

plt.xlim([0.0, 100])
plt.ylim([0.0, 105])
plt.title("WHEN: Precision-recall plot")
plt.legend(loc="lower left")
ax.get_legend().remove()
sns.despine()
plt.savefig("./figures/roc_when.png")
Exemplo n.º 22
0
    def plot(self, df_in, output_prefix, output_dir, dpi, force, show,
             verbose):
        df = df_in

        # Set a filename if needed.
        if self.filename is None:
            self.filename = f"{self.plot_type}--{self.xkey}--{self.ykey}--{self.huekey}--{self.stylekey}"

        # Use some seaborn deafault for fontsize etc.
        sns.set_context(self.sns_context, rc={"lines.linewidth": 2.5})

        # Set the general style.
        sns.set_style(self.sns_style)

        # Filter the data using pandas queries if required.
        if self.df_query is not None and len(self.df_query):
            df = df.query(self.df_query)

        # If the df is empty, skip.
        if df.shape[0] == 0:
            print(f"Skipping plot {self.filename} - 0 rows of data")
            return False

        # Get the number of palette values required.
        huecount = len(
            df[self.huekey].unique()) if self.huekey is not None else 1

        # Set palette.
        palette = sns.color_palette(self.sns_palette, huecount)
        sns.set_palette(palette)

        # create a matplotlib figure and axis, for a single plot.
        # Use constrained layout for better legend placement.
        fig, ax = plt.subplots(constrained_layout=True)

        # Set the size of the figure in inches
        fig.set_size_inches(FIGSIZE_INCHES[0], FIGSIZE_INCHES[1])

        # Generate labels / titles etc.
        xlabel = f"{pretty_csv_key(self.xkey)}"
        ylabel = f"{pretty_csv_key(self.ykey)}"
        huelabel = f"{pretty_csv_key(self.huekey)}"
        stylelabel = f"{pretty_csv_key(self.stylekey)}"
        hs_label = f"{huelabel} x {stylelabel}" if huelabel != stylelabel else f"{huelabel}"
        figtitle = f"{ylabel} vs {xlabel} (hs_label)"

        # @todo - validate keys.

        # Decide if using internal legend.
        external_legend = self.legend_outside
        filled_markers = ('o', 'v', '^', '<', '>', '8', 's', 'p', '*', 'h',
                          'H', 'D', 'd', 'P', 'X')
        g = None
        if self.plot_type == "lineplot":
            # plot the data @todo - lineplot vs scatter?
            g = sns.lineplot(
                data=df,
                x=self.xkey,
                y=self.ykey,
                hue=self.huekey,
                style=self.stylekey,
                markers=True,
                dashes=True,
                ax=ax,
                # size=6,
                legend=self.sns_legend,
                palette=palette,
            )
        elif self.plot_type == "scatterplot":
            g = sns.scatterplot(
                data=df,
                x=self.xkey,
                y=self.ykey,
                hue=self.huekey,
                style=self.stylekey,
                markers=True,
                ax=ax,
                # size=6,
                legend=self.sns_legend,
                palette=palette,
            )
        else:
            raise Exception(f"Bad plot_type {self.plot_type}")

        # Set a title
        # @disabled for now.
        # if len(figtitle):
        #     plt.title(figtitle)

        # adjust x axis if required.
        ax.set(xlabel=xlabel)
        if self.logx:
            ax.set(xscale="log")
        if self.minx is not None:
            ax.set_xlim(left=self.minx)
        if self.maxx is not None:
            ax.set_xlim(right=self.maxx)

        # adjust y axis if required.
        ax.set(ylabel=ylabel)
        if self.logy:
            ax.set(yscale="log")
        if self.miny is not None:
            ax.set_ylim(bottom=self.miny)
        if self.maxy is not None:
            ax.set_ylim(top=self.maxy)

        # Disable scientific notation on axes
        ax.ticklabel_format(useOffset=False, style='plain')

        # If there is reason to have a legend, do some extra processing.
        if ax.get_legend() is not None:
            legend = ax.get_legend()
            loc = None
            bbox_to_anchor = None
            if external_legend:
                # Set legend placement if not internal.
                loc = "upper left"
                # @todo - y offset should be LEGNED_BORDER_PAD trasnformed from font units to bbox.
                bbox_to_anchor = (1, 1 - self.legend_y_offset)

            # Get the handles and labels for the legend
            handles, labels = ax.get_legend_handles_labels()

            # Iterate the labels in the legend, looking for the huekey or stylekey as values
            # If either were found, replace with the pretty version
            found_count = 0
            for i, label in enumerate(labels):
                if label == self.huekey:
                    labels[i] = huelabel
                    found_count += 1
                elif label == self.stylekey:
                    labels[i] = stylelabel
                    found_count += 1

            # If neither were found, set a legend title.
            if found_count == 0:
                # add an invisble patch with the appropriate label, like how seaborn does if multiple values are provided.
                handles.insert(
                    0,
                    mpatches.Rectangle((0, 0),
                                       1,
                                       1,
                                       fill=False,
                                       edgecolor='none',
                                       visible=False,
                                       label=hs_label))
                labels.insert(0, hs_label)
                pass
            ax.legend(handles=handles,
                      labels=labels,
                      loc=loc,
                      bbox_to_anchor=bbox_to_anchor,
                      borderaxespad=LEGEND_BORDER_PAD)

        # if an output directory is provided, save the figure to disk.
        if output_dir is not None:
            output_dir = pathlib.Path(output_dir)
            # Prefix the filename with the experiment prefix.
            output_filename = f"{output_prefix}--{self.filename}"

            # Get the path for output
            output_filepath = output_dir / output_filename
            # If the file does not exist, or force is true write the otuput file, otherwise error.
            if not output_filepath.exists() or force:
                try:
                    if verbose:
                        print(f"writing figure to {output_filepath}")
                    fig.savefig(output_filepath, dpi=dpi, bbox_inches='tight')
                except Exception as e:
                    print(
                        f"Error: could not write to {output_filepath} with exception {e}"
                    )
                    return False
            else:
                print(
                    f"Error: {output_filepath} already exists. Specify a different `-o/--output-dir` or use `-f/--force`"
                )
                return False

        # If not outputting, or if the show flag was set, show the plot.
        if show:  # or output_dir is None:
            plt.show()

        return True
Exemplo n.º 23
0
from statsmodels.sandbox.predict_functional import predict_functional

# In[104]:

values = {"hist": 0, "tumorsize": 50, "accinsitu": 0, "lymphinv": 0}

# In[105]:

pr, cb, fv = predict_functional(result,
                                "age",
                                values=values,
                                ci_method="simultaneous")

# In[106]:

ax = sns.lineplot(fv, pr, lw=4)
ax.fill_between(fv, cb[:, 0], cb[:, 1], color='grey', alpha=0.4)
ax.set_xlabel("age")
ax.set_ylabel("Re-excision")

ax.set_title('Fitted Model: Log-odd probability of Age by Re-excision')

#This plot of fitted log-odds  visualizes the effect of age on reexcision for
#hist=0, tumorsize=23, accinsitu=0 and lumphinv=0 by the glm fitted model
#Slight negative correlation of age and RE are visible in this plot
#For the specific described variables

# In[ ]:

# In[100]:
Exemplo n.º 24
0
             bins=range(
                 int(company.Low.min()) - 1,
                 int(company.Low.max()) + 1),
             normed=True,
             rwidth=1)
    plt.xticks(range(int(company.Low.min()) - 1, int(company.Low.max()) + 1))
    plt.xlabel("Lowest Prize for each day")
    plt.ylabel("Probability")
    plt.title("Company:  " + name.split(".")[0], fontsize=20)
plt.show()

# In[18]:

for name, i, company in zip(companies_9, range(9), data):
    plt.figure(figsize=(15, 5))
    sns.lineplot(data=data[i], x='Date', y="Open", label="Opening Value")
    sns.lineplot(data=data[i], x='Date', y="Close", label="Closing Value")
    sns.lineplot(data=data[i],
                 x='Date',
                 y="High",
                 label="Highest Value Per Day")
    sns.lineplot(data=data[i], x='Date', y="Low", label="Lowest Value Per Day")
    plt.legend()
    #plt.xticks([])
    plt.xlabel("Time")
    plt.ylabel("Stock Prices")
    plt.title("Compnay:  " + name.split(".")[0])
    plt.show()

# In[23]:
Exemplo n.º 25
0
    #Print dataframe
    print("")
    print("")
    print("=" * 72)
    print(" " * 3,
          "Average time taken to run each sorting algorithm in milliseconds")
    print("=" * 72)
    print(df)
    print("=" * 72)

    totalEndTime = time.time()

    # Print time taked to execute program
    totalTimeElapsed = totalEndTime - totalStartTime
    print("")
    print("Program took ", round(totalTimeElapsed, 3), " seconds to run")

    # Testing functions

    #Save data in csv to plot graphs in seperate program quicker
    #df.to_csv('sortingDataframe.csv')

    # Plot data
    plot = sns.lineplot(data=df, markers=True,
                        dashes=False)  # plot using seaborn
    plot.set(xlabel='Array Size',
             ylabel='Time taken (milliseconds)')  # label axis
    #plt.yscale("log") # Uncomment to graph log scale results for larger number of inputs

    plt.show()  # Show plot
Exemplo n.º 26
0
 plt.style.use("ggplot")
 # plt.rcParams.update({'legend.fontsize': 14})
 p = sns.color_palette()
 sns.set_palette([p[0], p[1]])
 f, axes = plt.subplots(1, 1, figsize=(width, height))
 # sns.lineplot(x='samples', y='success_rate', hue='algo', ax=axes[0], data=sr_timesteps)
 # axes[0].set_xlabel('samples')
 # axes[0].set_ylabel('success_rate')
 # axes[0].get_legend().remove()
 # sns.lineplot(x='samples', y='eval', hue='algo', ax=axes[1], data=eval_timesteps)
 # axes[1].set_xlabel('samples')
 # axes[1].set_ylabel('')
 # axes[1].get_legend().remove()
 sns.lineplot(x='iterations',
              y='len_mean',
              hue='algo',
              ax=axes,
              data=len_mean_iteration)
 axes.set_xlabel('iterations')
 axes.set_ylabel('episode length')
 axes.get_legend().remove()
 handles, labels = axes.get_legend_handles_labels()
 # if mode == 'train':
 #     sns.lineplot(x='samples', y='success_rate', hue='algo', data=sr_timesteps)
 #     axes.set_xlabel('samples')
 # elif mode == 'hard':
 #     sns.lineplot(x='samples', y='eval', hue='algo', data=eval_timesteps)
 #     axes.set_xlabel('samples')
 # elif mode == 'iteration':
 #     sns.lineplot(x='iterations', y='eval', hue='algo', ax=axes, data=eval_iteration)
 #     axes.set_xlabel('iterations')
Exemplo n.º 27
0
    "bleu": [0, 0],
    "data_ord": [0, 0],
    "data": ["nejm.0", "nejm.0"],
    "direction": ["zh2en", "en2zh"],
    "train": ["de novo", "de novo"]
})
bleu = pd.concat([zeros, bleu])
ord2num = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 5.46}
bleu["x"] = bleu["data_ord"].apply(lambda x: ord2num[x])

plt.ion()
fig, ax = plt.subplots(1, 1)
g = sns.lineplot(x="x",
                 y="bleu",
                 hue="direction",
                 data=bleu,
                 legend="brief",
                 style="train",
                 markers=["o", "o"],
                 dashes=[(2, 1), ""])
fig.set_size_inches(5, 4)
fig.tight_layout()
g.legend_.texts[0].set_position((-40, 0))
g.legend_.texts[0].set_text("Direction")
g.legend_.texts[3].set_position((-40, 0))
g.legend_.texts[3].set_text("Training")
ax.set_xlabel("In-Domain Sentence Pairs")
ax.set_ylabel("1-ref BLEU")
ax.set_xticks([0, 1, 2, 3, 4, 5, 5.46])
ax.set_xticklabels(["0", "4000", "8000", "16000", "32000", "64000", ""])
ax.legend()
plt.savefig(f"{out_dir}/bleu.pdf")
                 kde_kws={'linewidth': 3},
                 label=race)

# Plot formatting
plt.legend(prop={'size': 16}, title='Race')
plt.title('Victim Race and Median Income (2017 - 2018)')
plt.xlabel('Median Income')
plt.ylabel('Density')

# line chart for victim race and income

#sns.lineplot(x="timepoint", y="signal", data=fmri)

sns.lineplot(x='Victim Descent',
             y='Median Income',
             data=la,
             markers=True,
             dashes=False)

#plot formatting
#plt.legend(prop={'size': 16}, title = 'Race')
plt.title('Victim Descent and Median Income (2017 - 2018)')

# line chart for victim race and income

#sns.lineplot(x="timepoint", y="signal", data=fmri)

sns.lineplot(x='Victim Age', y='Median Income', data=la)

#plot formatting
#plt.legend(prop={'size': 16}, title = 'Race')
Exemplo n.º 29
0
"""
Timeseries plots with error bands
=================================

_thumb: .5, .45

"""
import seaborn as sns
sns.set(style="darkgrid")

# Load an example dataset with long-form data
fmri = sns.load_dataset("fmri")

# Plot the responses for different events and regions
sns.lineplot(x="timepoint", y="signal",
             hue="region", style="event",
             data=fmri)
df_acc = pd.DataFrame(index=ary_epochs)

for ind in range(len(lst_evnt_trn)):

    # add data to pandas loss data frame
    df_loss['trn_loss' + lst_names[ind]] = lst_trn_lss[ind]
    df_loss['val_loss' + lst_names[ind]] = lst_val_lss[ind]
    # add data to pandas loss data frame
    df_acc['trn_acc' + lst_names[ind]] = lst_trn_acc[ind]
    df_acc['val_acc' + lst_names[ind]] = lst_val_acc[ind]

# plot losses
fig, ax = plt.subplots()
fig.set_size_inches(17.5, 12.5)
sns.lineplot(data=df_loss,
             palette=lst_colors,
             dashes=lst_dashes,
             linewidth=2.5)
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
fig.savefig(
    "/media/sf_D_DRIVE/Unet/presentation/results/plots/loss_model_weighted.svg"
)
fig.savefig(
    "/media/sf_D_DRIVE/Unet/presentation/results/plots/loss_model_weighted.png"
)

# plot accuracies
fig, ax = plt.subplots()
fig.set_size_inches(17.5, 12.5)
sns.lineplot(data=df_acc, palette=lst_colors, dashes=lst_dashes, linewidth=2.5)
plt.xlabel("Number of Epochs")
Exemplo n.º 31
0
 def plot(self, data=None, **fargs):
     if data is None:
         data = self.df
     data = data.melt("timeStamp", var_name="cols", value_name="vals")
     sns.lineplot(data=data, **fargs)
Exemplo n.º 32
0
        pass
    test_scorecard_array = np.asarray(test_scorecard)
    test_performance.append(test_scorecard_array.sum() /
                            test_scorecard_array.size)
    print("testing performance = ",
          test_scorecard_array.sum() / test_scorecard_array.size)
    pass

# plot performance
sns.set(rc={'figure.figsize': (15, 10)})
performance_x = np.vstack((train_performance, test_performance)).T
performance_x_df = pd.DataFrame(performance_x).reset_index()
df = performance_x_df.melt('index', var_name='train/test', value_name='vals')
df['train/test'] = df['train/test'].map({0: 'train', 1: 'test'})
performance_x_plot = sns.lineplot(x="index",
                                  y="vals",
                                  hue='train/test',
                                  data=df)
performance_x_plot.set(xlabel='EPOCHS', ylabel='PERFORMANCE')
plt.savefig('performance_x.png')

# confusion matrix
a_x = pd.Series(np.asarray(actual))
p_x = pd.Series(np.asarray(predicted))
confusion_matrix_x = pd.crosstab(a_x, p_x)
confusion_matrix_x

# for experiment 2...

# shuffle and sample quarter of the training data
np.random.shuffle(train_data)
quarter_train_data = train_data[:15000]
sns.get_dataset_names()

# ## lineplot

# In[61]:

fmri = sns.load_dataset("fmri")

# In[20]:

fmri.head()

# In[33]:

ax = sns.lineplot(x="timepoint", y="signal", err_style="band", data=fmri)

# In[28]:

ax = sns.lineplot(x="timepoint", y="signal", err_style="bars", data=fmri)

# In[63]:

ax = sns.lineplot(x="timepoint", y="signal", ci=95, color="m", data=fmri)
ax = sns.lineplot(x="timepoint", y="signal", ci=68, color="b", data=fmri)

# In[35]:

ax = sns.lineplot(x="timepoint", y="signal", ci='sd', color="m", data=fmri)

# In[37]:
Exemplo n.º 34
0
import seaborn as sns

data = pd.read_csv("Processed-text.csv",
                   low_memory=False,
                   index_col=[2],
                   parse_dates=[2]).sort_index()
data.drop(columns=['Unnamed: 0', 'body'], inplace=True, axis=1)

#Sales Distribution basis on comment
monthly_order_count = data.resample('M').count()
#orders_monthly.dropna(how='any',inplace=True)
monthly_order_count = pd.DataFrame(monthly_order_count)
#plt.plot_date(x=orders_monthly.index,y=orders_monthly['rating'],fmt='o')
monthly_order_count.rename(columns={'rating': 'Sales'}, inplace=True)
ax = sns.lineplot(x=monthly_order_count.index,
                  y=monthly_order_count['Sales'],
                  data=monthly_order_count,
                  markers=True)

#Overall sentiment over time period
orders_monthly = data.resample('M').median()
orders_monthly = pd.DataFrame(orders_monthly)
ax = sns.lineplot(x=orders_monthly.index,
                  y=orders_monthly['rating'],
                  data=orders_monthly,
                  markers=True)

import plotly.graph_objs as go
import plotly.offline as py

actual_chart = go.Scatter(x=orders_monthly.index,
                          y=orders_monthly['Sales'],
Exemplo n.º 35
0
df.loc[:, 'dh'] = df.loc[:, 'h'].diff() / pd.Timedelta(dt).total_seconds()

step = 1
for t in tides_rep_with_slr.index[1:]:
    t_min_1 = t - pd.Timedelta(dt)
    df.loc[t, 'z'] = calc_z(df.at[t_min_1, 'z'], df.at[t_min_1, 'dz'], 0, 0)
    df.loc[t, 'C0'] = calc_c0(
        df.at[t, 'h'], df.at[t, 'dh'], df.at[t, 'z'], A, SSC)
    df.loc[t, 'C'] = calc_c(df.at[t, 'C0'], df.at[t, 'h'], df.at[t_min_1, 'h'],
                            df.at[t, 'dh'], df.at[t_min_1, 'C'], df.at[t, 'z'], ws, dt_sec)
    df.loc[t, 'dz'] = calc_dz(df.at[t, 'C'], ws, rho, dt_sec)
    print('Completed step {0} of {1}.'.format(step, len(tides_rep_with_slr)))
    step = step + 1

# %%

f, ax = plt.subplots(figsize=(5,6))
sns.set_style("whitegrid")

plt_data = df.shift(freq=pd.Timedelta(days=1326))
plt_data = plt_data[::10]
sns.lineplot(data=plt_data.h, alpha = 0.25)
sns.lineplot(data=plt_data.z, color = 'black')
plt.title("Tidal Heights and Land Surface")
plt.xlabel("Year")
plt.ylabel("Height above Mean Water Level (m)")
plt.xlim(start + pd.Timedelta(days=1326), rep_end + pd.Timedelta(days=1326))
plt.show()

#%%
#tides_rep.tail()