Exemplo n.º 1
0
def evaluate_distribution_matrix(dis_matrix: sp.spmatrix,
                                 show: bool = True,
                                 tell: bool = True,
                                 save_path: str = None,
                                 row_name: str = "column",
                                 column_name: str = "row"):
    """
    Evaluate document-topic distribution matrix, involving a combination of:
    * printing statistics
    * showing boxplots
    * pruning empty docs and topics, and pruning topics that are too common
    :param dis_matrix: distribution matrix to be evaluated.
    :param column_name: name of columns for printing
    :param row_name: name of rows for printing
    :param show: whether to show boxplots
    :param tell: whether to print statistics
    :param save_path: path of file to save, default is None, meaning no saving
    :return: potentially pruned matrix
    """
    sb.set_theme(style="whitegrid")
    return_stats = []
    return_stats_combined = ()
    stat_names = [
        "Non-Zero", "Zero", "Zeros%", "Minimums", "Maximums", "Averages",
        "Medians", "Entropies"
    ]
    # loop over A-B distribution, then B-A distribution
    for ab in range(2):
        stats = {}
        return_stats = []
        non_zeros, num_zeros, per_zeros, empties, avgs, maxs, mins, medians, entropies = [], [], [], [], [], [], [], [], []
        # Fill out statistics for each row/column
        max_loop = 1 if ab == 0 else 0
        for i in tqdm(range(0, dis_matrix.shape[max_loop])):
            vec = dis_matrix.getcol(i) if ab == 0 else dis_matrix.getrow(i)
            non_vec = vec.nonzero()[ab]
            non_zeros.append(len(non_vec))
            num_zeros.append(vec.shape[ab] - len(non_vec))
            per_zeros.append((vec.shape[ab] - len(non_vec)) / vec.shape[ab])
            avgs.append(vec.mean())
            maxs.append(vec.max())
            mins.append(vec.min())
            medians.append(np.median(vec.toarray()))
            if len(non_vec) == 0:
                empties.append(i)
            vec_array = vec.toarray().T[0] if ab == 0 else vec.toarray()[0]
            # entropy is set to 1 if distribution is all zeros (which returns NaN).
            ent = 1 if np.isnan(entropy(vec_array,
                                        base=vec.shape[ab])) else entropy(
                                            vec_array, base=vec.shape[ab])
            entropies.append(ent)
        # Print statistics
        print_name = f"{column_name}-{row_name}" if ab == 0 else f"{row_name}-{column_name}"
        if tell:
            print(print_name)
            print(f"{len(empties)} empty vectors")
        stats = {
            stat_names[0]: non_zeros,
            stat_names[1]: num_zeros,
            stat_names[2]: per_zeros,
            stat_names[3]: mins,
            stat_names[4]: maxs,
            stat_names[5]: avgs,
            stat_names[6]: medians,
            stat_names[7]: entropies
        }
        # Make stats ready for return
        for name, stat in stats.items():
            return_stats.append(stats_of_list(stat, name=name, tell=tell))
        return_stats.append(len(empties))
        return_stats_combined += (return_stats, )
        # Save stats
        if save_path is not None:
            with open(save_path + "_" + print_name + '.csv', "w+") as f:
                for name, stat in zip(stats.keys(), return_stats):
                    f.write(f"{name}, " + ", ".join(str(x)
                                                    for x in stat) + "\n")
        # Show stats
        if show or save_path is not None:
            # remove absolute number zero statistics, as they are not in range [0,1]
            stats.pop(stat_names[0])
            stats.pop(stat_names[1])
            df = pd.DataFrame(data=stats)
            box = df.boxplot()
            box.set_title(print_name)
            if save_path is not None:
                plt.savefig(save_path + "_" + print_name + ".png")
            if show:
                plt.show()
            else:
                plt.clf()

    return return_stats_combined
def plot_toy(
    data,
    output_dir,
    annotate=False,
    site=None,
    zenith=None,
    obs_times=None,
    x_tick_labels="auto",
    y_tick_labels="auto",
    min_value=None,
    max_value=None,
    color_scheme="viridis",
    color_scale=None,
    as_percent=False,
    filetype="png",
    subtitle=None,
    filename_suffix="",
    show_only=False,
):
    sns.set_theme()

    if str(zenith).lower() == "all":
        zenith = None
    if site.lower() == "all":
        site = None

    df = analyze(data, site=site, zenith=zenith, obs_times=obs_times)
    df.rename(columns={"obs_time": "exposure time"}, inplace=True)

    if as_percent:
        df["percent"] = df["percent"] * 100

    pivot = df.pivot("exposure time", "delay", "percent").astype(float)

    f, ax = plt.subplots(figsize=(9, 9))

    cbar_kws = {
        "label": "Percentage of GRBs detected",
        "orientation": "vertical"
    }

    if color_scale == "log":
        from matplotlib.colors import LogNorm

        color_scale = LogNorm(vmin=min_value, vmax=max_value)

    if annotate:
        heatmap = sns.heatmap(
            pivot,
            annot=True,
            fmt=".0f",
            linewidths=0.5,
            ax=ax,
            cmap=color_scheme,
            vmin=min_value,
            vmax=max_value,
            xticklabels=x_tick_labels,
            yticklabels=y_tick_labels,
            cbar_kws=cbar_kws,
            norm=color_scale,
        )
    else:
        heatmap = sns.heatmap(
            pivot,
            annot=False,
            ax=ax,
            cmap=color_scheme,
            vmin=min_value,
            vmax=max_value,
            xticklabels=x_tick_labels,
            yticklabels=y_tick_labels,
            cbar_kws=cbar_kws,
            norm=color_scale,
        )

    heatmap.invert_yaxis()
    heatmap.set_facecolor("#1C1C1C")

    if not site:
        site = "Both sites"
    else:
        site = f"CTA {site.capitalize()}"

    if not zenith:
        zenith = "all zeniths"
    else:
        zenith = f"z{zenith}"

    if subtitle:
        plt.title(
            f"GRB Detectability for {site}, {zenith}: {subtitle} (n={len(np.unique(data.index))})"
        )
    else:
        plt.title(f"GRB Detectability for {site}, {zenith}")

    fig = heatmap.get_figure()

    if not show_only:
        output_file = f"{output_dir}/GW_{site.replace(' ','_')}_{zenith.replace(' ','_')}{filename_suffix}.{filetype}"
        fig.savefig(output_file)
        # print(f"Saved plot {output_file}")
    else:
        plt.show()
Exemplo n.º 3
0
from matplotlib import pyplot as plt
import seaborn as sns

font_cs = {'fontname': 'Consolas'}


def entropy(x_, b=2):
    if b == 2:
        h_x = -np.sum(x_ * np.log2(x_))
    else:
        h_x = -np.sum(x_ * np.log(x_))
    return h_x


x = np.arange(0.01, 1.00, 0.01)
h = entropy(x)

df = pd.DataFrame(h, x, columns=['entropy(x)'])
# df = pd.DataFrame(h, x1, columns=['x*-logP(x)+(1-x)*(-logP(1-x))'])
print(df)

sns.set_theme(style='whitegrid')

sns.lineplot(data=df)
plt.xlabel('x', **font_cs)
plt.ylabel('value', **font_cs)
plt.title('Entropy', **font_cs)

plt.show()

async def main():
    """
	Main function of the application.
	:return: Nothing.
	"""
    print_header()
    timer_main = Timer()

    config = default_config()

    # read and prepare dataset for training
    df_timeseries_complete = load_dataset("zurich_adapter", config)

    df_timeseries = chop_first_fringe(
        df_timeseries_complete)  # Chop first improper filled rows
    imputed_timeseries = impute_simple_imputer(df_timeseries)
    smooth_timeseries = moving_average(imputed_timeseries)
    smooth_timeseries.dropna(
        inplace=True
    )  # Make sure there really is no empty cell anymore, else drop row
    # Split training/testing data in 80%/20%
    df_train_val, df_test = temporal_train_test_split(smooth_timeseries,
                                                      test_size=.20)

    # Define all models at our disposal
    models = [
        ModelHolder(name="arima", trainer=train_or_load_ARIMA, config=config),
        ModelHolder(name="autoarima",
                    trainer=train_or_load_autoARIMA,
                    config=config),
        ModelHolder(name="expsmooting",
                    trainer=train_or_load_expSmoothing,
                    config=config),
        ModelHolder(name="lstm", trainer=train_or_load_LSTM, config=config),
        ModelHolder(name="lstm_seq", trainer=train_or_load_LSTM, config=config)
    ]

    # Train the models
    trained_models = await gather(*[
        to_thread(train_model, model=model, data=df_train_val)
        for model in models
    ])
    [model.model.store(model.config) for model in trained_models
     ]  # Stores if not existing. Does NOT OVERWRITE!!!

    # Test the generalization performance of our models
    forecast_test = [
        model.model.predict(x=df_test, fh=5) for model in trained_models
    ]

    print(forecast_test)

    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.PM10', 'Zch_Stampfenbachstrasse.PM10_Pred']])
    # plt.plot(forecast_test[0][['Zch_Stampfenbachstrasse.Humidity', 'Zch_Stampfenbachstrasse.Temperature']])
    # plt.show()

    logger.info(f"Script completed in {timer_main}.")
    logger.info("Terminating gracefully...")

    logger.info("start predicting new time")

    forecast_dict = {
        "arima": pd.Series(),
        "autoarima": pd.Series(),
        "expsmoothing": pd.Series(),
        "lstm": pd.Series(),
        "lstm_seq": pd.Series()
    }
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().rename(
            columns={
                "humidity": "Live.Humidity",
                "pm10": "Live.PM10",
                "temperature": "Live.Temperature"
            })
        imputed_data = impute_simple_imputer(data)  # Impute
        avg_data = moving_average(imputed_data)  # Average input
        logger.debug("Forecasting")
        forecast_list = [
            model.model.predict(x=avg_data, fh=5) for model in trained_models
        ]  # Make predictions

        logger.info(forecast_list)
        forecast_dict = {
            "arima":
            forecast_list[0],
            "autoarima":
            forecast_list[1],
            "expsmoothing":
            forecast_list[2],
            "lstm":
            forecast_list[0].iloc[:, forecast_list[0].columns.
                                  get_loc("Live.PM10_Pred")],  # was item 3
            "lstm_seq":
            forecast_list[1].iloc[:, forecast_list[1].columns.get_loc(
                "Live.PM10_Pred")]  # was item 4
        }

    forecast = pd.DataFrame(data=forecast_dict)
    logger.debug(forecast)
    forecast = forecast.mean(axis=1).head(n=50)
    forecast.name = "forecast"
    logger.info(f"Forcasting finished with forecast value\n {forecast}")

    config["influx"]["limit"] = "150"
    config["influx"][
        "drops"] = '["pm1", "pm4.0", "pm2.5", "result", "table", "_time", "humidity", "temperature"]'
    with InfluxSensorData(config=config, name="influx") as client:
        # Load the data from the server
        data = client.get_data().tail(n=50)
        data.index = range(len(data))
        data = data.iloc[:, 0]
        print(f"data {data}")

    sns.set_theme(style="darkgrid")
    sns.lineplot(data=[forecast, data])
def presentation_frequency_plot_figures(outcome):
    # This function method attemps to generate all the frequency plots created to
    # analyze each EMS call outcome distribution across fire station and shift.

    # This function used the outcome variable to define which distribution we
    # would like to visualize:
    #  * The Overall value is used to plot the distribution for all the
    #    EMS call outcomes across fire station and shift (this plot was
    #    not used on the presentation or report).
    #
    #  * The Outcome value is used to plot the EMS call outcome frequency
    #    count across shift.
    #
    #  * The Top 4 Outcomes value is used to plot the top 4 EMS call outcome
    #    frequency count across shift (This plot was created for the presentation
    #    with the intent of optimizing space usage for the briefing)
    #
    #  * The remaining plots can be generated by entering the EMS call outcome
    #    of interest. If the EMS call outcome is properly entered as value the
    #    data frame is reduced to show only the records associated for the
    #    individual EMS call outcome across fire station and shift.
    df = df_q4.copy(deep=True)

    # Axis Labels
    x_label = 'Counts'
    y_label = 'Fire Station'

    # Y Selection
    y_sel = 'FireStation'

    # General figure and font size
    gen_fig_size = (20, 20)
    gen_font_size = 24

    if outcome == 'Overall':
        title = outcome + ' Fire Station Outcomes Across Shift'
    elif outcome == 'Outcome':
        title = 'Patient Outcome Frequency'
        y_label = 'Patient Outcome'
        y_sel = 'PatientOutcome'
        sns.set_theme(style=seaborn_theme)
    elif outcome == 'Top 4 Outcomes':
        title = 'Patient Outcome Frequency'
        y_label = 'Patient Outcome'
        y_sel = 'PatientOutcome'
        out_list = [
            'Treated & Transported', 'Patient Refusal  (AMA)',
            'No Treatment/Transport Required', 'Canceled (Prior to Arrival)'
        ]
        df = df_q4[df_q4['PatientOutcome'].isin(out_list)].copy(deep=True)
        gen_fig_size = (10, 5)
        gen_font_size = 18
    else:
        title = outcome + ' Outcome Across Fire Station and Shift'
        df = df_q4[df_q4['PatientOutcome'] == outcome].copy(deep=True)

    #Plot
    plt.subplots(figsize=gen_fig_size)
    sns.set_theme(style=seaborn_theme)
    ax = sns.countplot(data=df,
                       y=y_sel,
                       hue='Shift',
                       palette=palette_sel_distinct,
                       order=df[y_sel].value_counts().index,
                       hue_order=['A - Shift', 'B - Shift', 'C - Shift'])
    ax.set_title(title, fontsize=gen_font_size)
    ax.set_xlabel(x_label, fontsize=gen_font_size)
    ax.set_ylabel(y_label, fontsize=gen_font_size)
    ax.tick_params(labelsize=gen_font_size)
    ax.legend(fontsize=gen_font_size, loc='lower right')
Exemplo n.º 6
0
def space_invaders(name_plot='r', ci='t', include_css=False, holder_melt=None):
    '''['r', 'sd', 'iqr', 'range', 'no_norm'], t vs bs '''
    assert holder_melt is not None
    df = holder_melt.copy()
    sns.set_theme(context='poster',
                  style='darkgrid',
                  font='sans-serif',
                  color_codes=True)
    plt.rcParams["figure.figsize"] = (16, 12)

    name_plot = name_plot
    name_y = 'rmse_' + name_plot
    include_css = include_css

    if name_plot == 'r':
        name_y = 'mean_corr'

    if include_css:
        errors = df[name_plot].ci
        data = df[name_plot]
    else:
        errors = df[name_plot].loc[df[name_plot]['metric'] != 'CSS', 'ci']
        data = df[name_plot].loc[df[name_plot]['metric'] != 'CSS', :]

    #plt.rcParams["errorbar.capsize"] = 0.05
    #colors = ['#a6cee3','#1f78b4','#b2df8a','#ffff99','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6','#6a3d9a','#33a02c']
    ax = sns.pointplot(x='metric',
                       y=name_y,
                       hue='model',
                       style='metric',
                       data=data,
                       dodge=0.6,
                       join=False,
                       ci=None,
                       scale=1,
                       palette=sns.color_palette('Paired', data.shape[0])
                       #palette = sns.color_palette("Paired", 13)
                       )

    # Find the x,y coordinates for each point
    x_coords = []
    y_coords = []
    for point_pair in ax.collections:
        for x, y in point_pair.get_offsets():
            x_coords.append(x)
            y_coords.append(y)

    # Calculate the type of error to plot as the error bars
    # Make sure the order is the same as the points were looped over

    ax.errorbar(x_coords,
                y_coords,
                yerr=errors,
                fmt='none',
                c='black',
                elinewidth=4,
                markeredgewidth=4,
                zorder=-1,
                capsize=10)

    ax.set_xlabel('')
    ax.set_ylabel('')

    plt.title('rmse normed by ' + name_plot + ' confidence calculated using ' +
              ci)

    if name_plot == 'r':
        plt.title('r ' + 'confidence calculated using ' + ci)
    elif name_plot == 'no_norm':
        plt.title('rmse not normed ' + 'confidence calculated using ' + ci)

    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.1)

    return plt
Exemplo n.º 7
0
df_opp["Type"] = "Opp"
df = pd.concat([df_same, df_opp], ignore_index=True)
df = df[df["Step"] < 9]
df["Step"] = r"$i$+" + df["Step"].astype(int).astype(str)
barWidth = 0.35
nodes = [0, 0.25, 0.5, 0.75, 1]
colors = ["#FDE725FF", "#440154FF", "#FDE725FF"]
# Regular red and green
# colors = ["#6BE585", "#DD3E54", "#6BE585"]
# degree_cmap = LinearSegmentedColormap.from_list("", list(zip(nodes, colors)))
# degree_cmap = LinearSegmentedColormap.from_list("", list(zip(nodes, colors)))
degree_cmap = mpl.colors.ListedColormap(mpl.cm.get_cmap('viridis_r').colors + mpl.cm.get_cmap('viridis').colors)
# print(degree_cmap.colors)
# print(len(degree_cmap.colors))
# grid = plt.GridSpec(3,6, wspace=0.4, hspace=0.1)
sns.set_theme(style="white", context="paper")
# sns.set(fontsize=14)
#print(df.groupby(["Type","Step"])["Step"].count())
f, axes = plt.subplots(5,1,figsize=(7, 9), gridspec_kw={"height_ratios":[72,1,72,1,72]})
# h = sns.histplot(df, x="Step", color="grey", hue="Type", discrete=True, multiple="dodge", shrink=.8, ax=axes[2])
# h = sns.histplot(df, x="Step", color="grey", hue="Type", discrete=True, hue_order=["Opp","Same"], multiple="dodge", shrink=.8, ax=axes[2])
# axes[2].get_legend().remove()
#axes[2].set_alpha(0.8)
hatches = {0:"///", 1:"", 2:"|||"}
fill = {0:"#FFFFFF", 1:False, 2:False}
# Distinct colors 0-8
distinct_colors = ["#FFFFFF", "#773712", "#B3B3B3", "#EE7F31", "#FBE44D", "#B3B3B3", "#B3B3B3", "#D5C4AB", "#B3B3B3", "#B3B3B3"]
wheel_colors = ["#FFFFFF", "#773712", "#B3B3B3", "#EE7F31", "#FBE44D", "#B3B3B3", "#B3B3B3", "#D5C4AB", "#FFFFFF", "#B3B3B3"]
# for i in range(2):
#     for j in range(8):
#         color_index = (j*100 + 100) % 360
Exemplo n.º 8
0
                        help='position on the hyperparameter list')
    parser.add_argument('-p',
                        '--params',
                        help='path to a list of hyperparameters')
    parser.add_argument('-r',
                        '--random',
                        default=123,
                        type=int,
                        help='random seed')
    parser.add_argument('session', help='name of the session')
    parser.add_argument('-v', '--verbose', action='count', default=0)

    args = parser.parse_args()

    # set matplotlib backend to batch use
    sns.set_theme(context='paper', palette='tab10')
    mpl.use('agg')

    # create relevant directory structure
    img_dir = os.path.join(args.session, 'img')
    mod_dir = os.path.join(args.session, 'mod')
    log_dir = os.path.join(args.session, 'log')

    os.makedirs(img_dir, exist_ok=True)  # image directory
    os.makedirs(mod_dir, exist_ok=True)  # models directory
    os.makedirs(log_dir, exist_ok=True)  # log directory

    # create log file
    level = logging.ERROR
    if args.verbose == 1:
        level = logging.INFO
import csv
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(color_codes=True)
import numpy as np
#creates lists for the ratings and shot quality that will be used to graph
teamOffensiveRatings = []
teamOffShotQuality = []
teamDefensiveRatings = []
teamDefShotQuality = []

#opens CSV file for offense
with open('pbpstats 2020-2021 Team Data Offense.csv') as csv_file:
    csv_reader = csv.reader(csv_file, delimiter=',')
    line_count = 0

    for row in csv_reader:
        #skips first line
        if line_count == 0:
            pass
            line_count += 1

        else:
            #row 3 has total points data
            points = int(row[3])
            #row 2 has total possessions data
            possessions = int(row[2])
            #row 20 has shot quality data
            shotQuality = float(row[20])
            #calculates offensive rating
            offRating = (points / possessions) * 100
Exemplo n.º 10
0
    if provide_mean_lengths == True:
        return L_32, L_43, D_32, D_43, unreal_D43, unreal_D32
    else:
        return DSD


#### TRIAL USAGE #########

DSD = CLDtoDSDMethod1('Experiment 2020-11-27 10-34 Default.csv', 'Last Time')
DSDm2 = CLDtoDSDMethod2('Experiment 2020-11-27 10-34 Default.csv', 'Last Time')
L_32, L_43, D_32, D_43, unreal_D43, unreal_D32 = CLDtoDSDMethod2(
    'Experiment 2020-11-27 10-34 Default.csv',
    'Last Time',
    provide_mean_lengths=True)
CLD = ExperimentalCLD('Experiment 2020-11-27 10-34 Default.csv', 'Last Time')
sns.set_theme(context='paper', style='ticks', font_scale=2, palette='bright')
sns.set_style({'font.family': 'serif', 'font.serif': 'Times New Roman'})
plt.figure(figsize=(10, 10))
plt.semilogx(CLD.iloc[:, 0].values,
             CLD.iloc[:, 1].values,
             label='CLD',
             linewidth=2,
             color='black')
plt.semilogx(DSD['Diameter'].values,
             DSD['Counts'].values,
             label='Method 1',
             linewidth=2,
             color='red')
plt.semilogx(DSDm2['Diameter'].values,
             DSDm2['Counts'].values,
             label='Method 2',
# In[207]:

#making a pivot table IsBorrowerHomeowner is column and row IncomeRange
ct_counts = ct_counts.pivot(index='IncomeRange',
                            columns='IsBorrowerHomeowner',
                            values='count')

# In[208]:

fig, ax = plt.subplots(figsize=[14.70, 8.27])
sb.heatmap(ct_counts, annot=True, fmt='d', ax=ax)

# In[209]:

sb.set_theme(style="darkgrid")
fig, ax = plt.subplots(figsize=[14.70, 8.27])
sb.countplot(data=df, x='IsBorrowerHomeowner', hue='IncomeRange', ax=ax)

# #### different method is used  to observe the relation between owning home and income range  and liky found that the range of 100k dollar is person who owning home is more .
#

# In[210]:

df['StatedMonthlyIncome'].head()

# In[211]:

#regression plot to show the line which represent the correlation .
fig, ax = plt.subplots(figsize=[14.70, 8.27])
sb.regplot(data=df,
import math
import os

import h5py
from Bio import SeqIO
from tqdm import tqdm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

sns.set_theme(style='whitegrid',
              rc={"xtick.bottom": True},
              font_scale=0.9,
              font='Verdana')

plt.rcParams["figure.figsize"] = (8, 4)
plt.rcParams['figure.dpi'] = 300
font = {'family': 'normal', 'weight': 'bold', 'size': 10}

plt.rc('font', **font)
df = pd.read_csv('../data/results/paper_tables.CSV')
remapping = {
    'Baseline': 'Majority',
    'LocTree2': 'LocTree2',
    'MultiLoc2': 'MultiLoc2',
    'SherLoc2': 'SherLoc2',
    'Yloc': 'Yloc',
    'CELLO': 'CELLO',
    'iLoc-Euk': 'iLoc-Euk',
    'WoLF PSORT': 'WolF PSORT',
Exemplo n.º 13
0
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import streamlit as st




sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})

def label(x, color, label):
    ax = plt.gca()
    ax.text(0, .4, label, fontweight="bold", color='black',
            ha="left", va="center", transform=ax.transAxes)

df = pd.read_csv('data/nsi.csv')
df=df.set_index("date")
vars = ["st", "wl", "ml", "gm"]
labels = ["Stress", "Workload", "Motivation", "Mood"]


mainvar = "Stress"
v = vars[labels.index(mainvar)]

tdf = df.filter(regex=v+'_\d') # I love regex
tdf["date"] = tdf.index
tdf = pd.wide_to_long(tdf, stubnames=v+"_", i="date", j="score").reset_index().rename(columns={v+"_": v}).dropna()
tdf = tdf.loc[tdf.index.repeat(tdf[v])].reset_index()
tdf["date"] = pd.to_datetime(tdf["date"], format='%d/%m/%Y')
tdf["date"] = tdf["date"].dt.strftime('%Y-%m-%d')
tdf = tdf.sort_values(by='date',ascending=False)
Exemplo n.º 14
0
import argparse
import csv
from functools import partial
import gzip
from pathlib import Path
import sys

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from tqdm import tqdm

sns.set_theme(style='white', context='paper')


def extract_scores(scores_csv, score_col=1, title_line=True):
    if Path(scores_csv).suffix == '.gz':
        open_ = partial(gzip.open, mode='rt')
    else:
        open_ = open

    with open_(scores_csv) as fid:
        reader = csv.reader(fid)
        if title_line:
            next(reader)
        scores = []
        for row in tqdm(reader):
            try:
                score = float(row[score_col])
            except ValueError:
                continue
Exemplo n.º 15
0
"""
Smooth kernel density with marginal histograms
==============================================

_thumb: .48, .41
"""
import seaborn as sns

sns.set_theme(style="white")

df = sns.load_dataset("penguins")

g = sns.JointGrid(data=df, x="body_mass_g", y="bill_depth_mm", space=0)
g.plot_joint(sns.kdeplot,
             fill=True,
             clip=((2200, 6800), (10, 25)),
             thresh=0,
             levels=100,
             cmap="rocket")
g.plot_marginals(sns.histplot, color="#03051A", alpha=1, bins=25)
Exemplo n.º 16
0
def Fig3_boxplot(start_yr, var_names, ylabels, ylabels_R, ranges, ranges_diff):

    # set plots
    sns.set_style("ticks")
    sns.set_style({"xtick.direction": "in", "ytick.direction": "in"})
    sns.set_theme(style="ticks", palette="pastel")

    fig, axs = plt.subplots(2, 2, figsize=(10, 7))

    # fig = plt.figure(figsize=(12,6))
    plt.rcParams['text.usetex'] = False
    plt.rcParams['font.family'] = "sans-serif"
    plt.rcParams['font.serif'] = "Helvetica"
    plt.rcParams['axes.linewidth'] = 1.5
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['font.size'] = 12
    plt.rcParams['legend.fontsize'] = 12
    plt.rcParams['xtick.labelsize'] = 12
    plt.rcParams['ytick.labelsize'] = 12
    plt.rcParams["legend.markerscale"] = 3.0

    almost_black = '#262626'
    # change the tick colors also to the almost black
    plt.rcParams['ytick.color'] = almost_black
    plt.rcParams['xtick.color'] = almost_black

    # change the text colors also to the almost black
    plt.rcParams['text.color'] = almost_black

    # Change the default axis colors from black to a slightly lighter black,
    # and a little thinner (0.5 instead of 1)
    plt.rcParams['axes.edgecolor'] = almost_black
    plt.rcParams['axes.labelcolor'] = almost_black

    # set the box type of sequence number
    props = dict(boxstyle="round", facecolor='white', alpha=0.0, ec='white')
    #colors = cm.Set2(np.arange(0,len(case_labels)))

    # ax = fig.add_subplot(111)
    # ax2 = ax.twinx()

    orders = ['(a)', '(b)', '(c)', '(d)']

    for i, var_name in enumerate(var_names):

        row = i // 2  # round
        col = i % 2  # mod

        # -------------------- boxplot ---------------------

        # read box values
        filename_GW = "./txt/" + var_name + "_GW_rawdata_4_Python.txt"
        filename_FD = "./txt/" + var_name + "_FD_rawdata_4_Python.txt"

        df_gw = read_summer_heatwave(filename_GW, start_yr)
        df_gw['experiment'] = "GW"
        df_fd = read_summer_heatwave(filename_FD, start_yr)
        df_fd['experiment'] = "FD"

        # make one dataframe
        df = pd.concat([df_gw, df_fd])
        print(df)

        # Plotting boxplot
        axs[row, col] = sns.boxplot(x="year",
                                    y="var",
                                    data=df,
                                    showfliers=False,
                                    palette=["m", "g"],
                                    hue="experiment",
                                    whis=0)
        # xxlim = axs[row,col].get_xlim()
        axs[row, col].set_ylim(ranges[i])
        axs[row, col].set_ylabel(ylabels[i])
        # axs[row,col].set_xlabel(" ")

        # Adding shadings
        fill_color = (1., 0.972549, 0.862745)  # named color "cornsilk" in ncl
        axs[row, col].fill_between([0.5, 8.5],
                                   ranges[i][0],
                                   ranges[i][1],
                                   facecolor=fill_color,
                                   alpha=0.5)
        axs[row, col].fill_between([16.5, 19.5],
                                   ranges[i][0],
                                   ranges[i][1],
                                   facecolor=fill_color,
                                   alpha=0.5)

        xtickslocs = [
            0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
        ]
        xticklabels = [
            "2001", "", "2003", "", "2005", "", "2007", "", "2009", "", "2011",
            "", "2013", "", "2015", "", "2017", "", "2019"
        ]
        # plt.setp(axs[row,col].get_xticklabels(), visible=False)

        if row == 0 and col == 0:
            axs[row, col].legend(numpoints=1, loc="best",
                                 frameon=False)  # loc=(0.7, 0.8)
        else:
            axs[row, col].get_legend().remove()

        if row == 1:
            # plt.setp(axs[row,col].get_xticklabels(), visible=False)
            # axs[row,col].get_xaxis().set_visible(True)
            axs[row, col].set(xticks=xtickslocs, xticklabels=xticklabels)
        else:
            axs[row, col].get_xaxis().set_visible(False)

        axs[row, col].text(0.05,
                           0.95,
                           orders[i],
                           transform=axs[row, col].transAxes,
                           fontsize=14,
                           verticalalignment='top',
                           bbox=props)  #

        # # -------------------- lines ---------------------
        # # read line values
        # medians_gw = df_gw.groupby(['year'])['var'].median().values
        # medians_fd = df_fd.groupby(['year'])['var'].median().values

        # # Plotting boxplot
        # axs2 = axs[row,col].twinx()
        # axs2.plot(medians_gw-medians_fd, ls="-", color=almost_black, label="GW-FD")
        # #align_yaxis(ax, 0, ax2, 0)

        # axs2.set_ylim(ranges_diff[i])
        # axs2.set_ylabel(ylabels_R[i])

        # for ind, label in enumerate(axs[row,col].get_xticklabels()):
        #     if ind % 2 == 0:  # every 2nd label
        #         label.set_visible(True)
        #         plt.setp(axs[row,col].get_xticklabels(), visible=True)
        #         axs[row,col].set(xticks=xtickslocs, xticklabels=xticklabels)
        #     else:
        #         label.set_visible(False)
        #         plt.setp(axs[row,col].get_xticklabels(), visible=True)
        #         axs[row,col].set(xticks=xtickslocs, xticklabels=xticklabels)

    fig.savefig("./plots/plot_boxplots.png",
                bbox_inches='tight',
                dpi=300,
                pad_inches=2)  #
import pandas as pd
from kneed import KneeLocator
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import seaborn as sns; sns.set_theme()
from sklearn.metrics import accuracy_score

data = pd.DataFrame(pd.read_excel("advancedkmeans.xlsx"))

data["RPN"] = data["Risk priortiy number"]
data = data.drop(axis=0, columns=["Risk priortiy number"])

x = data[["S","O","D"]].values

distance = []
K = range(1,15)
for k in K:
    km = KMeans(n_clusters=k)
    km = km.fit(x)
    distance.append(km.inertia_)

x_values = list(K)
y_values = distance

kene = KneeLocator(x_values,y_values, curve='convex', direction='decreasing', interp_method='interp1d')
breakpoint = kene.knee

km = KMeans(n_clusters = breakpoint, init = "k-means++", random_state = 17)
clusters = km.fit_predict(x)

data["Cluster Values"] = list(x[clusters])
Exemplo n.º 18
0
# Lab 7: Seaborn plotting tutorial

import seaborn as sns

sns.set_theme(style='darkgrid',
              font_scale=3)  # older version of sns: sns.set()
tips = sns.load_dataset('tips')

# Distribution plots
sns.displot(tips, x='total_bill', col='sex', kind='kde')
sns.displot(tips, x='total_bill', kind='kde')
sns.displot(tips, x='total_bill', kind='kde', cut=0)
sns.displot(tips, x='total_bill', stat='density')
sns.displot(tips, x='total_bill', y='size', kind='kde')
sns.displot(tips, x='total_bill', col='sex', kind='kde')

# Relational plots
sns.relplot(x='total_bill', y='tip', data=tips)
sns.relplot(x='total_bill', y='tip', hue='smoker', data=tips)
sns.relplot(x='total_bill',
            y='tip',
            hue='smoker',
            style='sex',
            data=tips,
            s=100)
sns.relplot(x='total_bill', y='tip', size='size', sizes=(15, 200), data=tips)

# Categorical plots
sns.catplot(x='day', y='total_bill', data=tips)
sns.catplot(x='day', y='total_bill', kind='swarm', data=tips)
sns.catplot(x='day', y='total_bill', hue='smoker', kind='swarm', data=tips)
Exemplo n.º 19
0
print(iris_dataset.DESCR)

correlation_matrix = iris.corr()
sns.heatmap(data=correlation_matrix, annot=True, cmap='Greys')

sns.set()
fig, axes = plt.subplots(1, 4, figsize=(20, 5))

features = [
    'sepal length (cm)', 'sepal width (cm)', 'petal length (cm)',
    'petal width (cm)'
]
target = ['class']

sns.set_theme(style="ticks")
for i, col in enumerate(features):
    sns.stripplot(ax=axes[i], x=target[0], y=col, data=iris)

iris.isnull().sum()

from sklearn.model_selection import train_test_split

X = iris.iloc[:, [0, 1, 2, 3]].values
y = iris.iloc[:, 4].values

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=0)
Exemplo n.º 20
0
def set_sns_format(width=15, height=6):
    sns.set_theme(palette='pastel',
                  context='notebook',
                  rc={'savefig.dpi': 300})
    matplotlib.rcParams['figure.figsize'] = (width, height)
    return None
Exemplo n.º 21
0
def run(_cfg,fout=None,source_data=None):
    cfg = ConfigParser(interpolation=ExtendedInterpolation())
    cfg.read(_cfg)
    
    #_remove = aux.read.into_list(cfg['mat']['remove']) 
    _remove = ['VC01','VD01','VB01','VB02','HSNL','HSNR','PVNL','PVNR','PLNL','PLNR','PVR','PVR.']
    left =  aux.read.into_list(cfg['mat']['left_nodes']) 
    right = aux.read.into_list(cfg['mat']['right_nodes'])
    lrmap = aux.read.into_lr_dict(cfg['mat']['lrmap']) 
    data = []

    N2U = 'N2U'
    JSH = 'JSH'
    n2u = from_db(N2U,adjacency=True,remove=_remove)
    jsh = from_db(JSH,adjacency=True,remove=_remove)
    ndelta,jdelta,bdelta = [],[],[]

    lnd = get_adj_deg(n2u,vertices = left)
    rnd = get_adj_deg(n2u,vertices = right)
    tmp = [n for n in sorted(lnd)]
    for (l,r) in [(n,lrmap[n]) for n in sorted(lnd.keys())]: 
        data.append(['Adult L/R',l,r,lnd[l],rnd[r],lnd[l]-rnd[r]])
        ndelta.append(lnd[l]-rnd[r])
    
    lnd = get_adj_deg(jsh,vertices = left)
    rnd = get_adj_deg(jsh,vertices = right)
    for (l,r) in [(n,lrmap[n]) for n in sorted(lnd.keys())]: 
        data.append(['L4 L/R',l,r,lnd[l],rnd[r],lnd[l]-rnd[r]])
        jdelta.append(lnd[l]-rnd[r])
    
    cells = []
    for n in sorted(lnd.keys()):
        cells.append(n)
        cells.append(lrmap[n])
    bnd = get_adj_deg(n2u,vertices = cells)
    bjd = get_adj_deg(jsh,vertices = cells)
    for c in cells:
        data.append(['Adult/L4',c,c,bnd[c],bjd[c],bnd[c]-bjd[c]])
        bdelta.append(bnd[c]-bjd[c])
     
    df = pd.DataFrame(data,columns=["Comparison","Cell1","Cell2","Deg1","Deg2","Deg_diff"])
    print('Stats:')
    print_wilcoxon(ndelta,'Adult L/R')
    print_wilcoxon(jdelta,'L4 L/R')
    print_wilcoxon(bdelta,'Adult/L4',alternative="greater")
    
    #tval1,pval1 = ttest_ind(ndelta,jdelta)
    #tval2,pval2 = ttest_ind(jdelta,bdelta)
    #tval3,pval3 = ttest_ind(ndelta,bdelta)
    
    sns.set_theme(style="whitegrid")
    fig,ax = plt.subplots(1,1,figsize=(2.15,1.7))
    flierprops = dict(markersize=1,marker='d',markerfacecolor='k')
    medianprops = dict(linestyle='-',linewidth=0.5,color='k')
    whiskerprops = dict(linestyle='-',linewidth=0.3,color='k')
    capprops = dict(linewidth=0.3)
    sns.boxplot(x="Comparison",y="Deg_diff",
            data=df,width=0.3,ax=ax,linewidth=0.3,color="#a5a5a5",
            flierprops=flierprops,medianprops=medianprops,capprops=capprops)
    ax.set_ylim([-30,30])
    ax.set_yticks([-30,-20,-10,0,10,20,30])
    #ax.set_yticklabels([-30,-20,-10,0,10,20,30],fontsize=5)
    for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(7)
    for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(5)
    ax.axhline(0,color='r',linewidth=0.8,linestyle='--')
    ax.set_xlabel("")
    ax.set_ylabel("Degree difference",fontsize=7)
    plt.tight_layout() 
    if fout: plt.savefig(fout)
    plt.show()
    if source_data: df.to_csv(source_data,index=False)
# Pairs Trading Strategy

# - Use PCA to reduce dimensionality of the daily returns
# - Then add the company details to do K-means clustering
# - Then conduct time series analysis on each pair in each cluster to find appropriate trading pairs

# Import packages
import pandas as pd
import seaborn as sns
sns.set_theme(style = 'white', context = 'talk')
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn import preprocessing
from scipy import stats
from statsmodels.tsa.stattools import coint
import statsmodels.api as sm
import os
import base64
import warnings
warnings.filterwarnings('ignore')

# Set page name and icon
st.set_page_config(
    page_title = 'Pairs Trading',
    page_icon = 'n.png',
)

# Set page title
Exemplo n.º 23
0
def build_model(df):
    df = df.loc[:100]  # FOR TESTING PURPOSE, COMMENT THIS OUT FOR PRODUCTION
    X = df.iloc[:, :-1]  # Using all column except for the last column as X
    Y = df.iloc[:, -1]  # Selecting the last column as Y

    st.markdown('**1.2. Dataset dimension**')
    st.write('X')
    st.info(X.shape)
    st.write('Y')
    st.info(Y.shape)

    st.markdown('**1.3. Variable details**:')
    st.write('X variable (first 20 are shown)')
    st.info(list(X.columns[:20]))
    st.write('Y variable')
    st.info(Y.name)

    # Build lazy model
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=split_size, random_state=seed_number)
    reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)
    models_train, predictions_train = reg.fit(X_train, X_train, Y_train,
                                              Y_train)
    models_test, predictions_test = reg.fit(X_train, X_test, Y_train, Y_test)

    st.subheader('2. Table of Model Performance')

    st.write('Training set')
    st.write(predictions_train)
    st.markdown(filedownload(predictions_train, 'training.csv'),
                unsafe_allow_html=True)

    st.write('Test set')
    st.write(predictions_test)
    st.markdown(filedownload(predictions_test, 'test.csv'),
                unsafe_allow_html=True)

    st.subheader('3. Plot of Model Performance (Test set)')

    with st.markdown('**R-squared**'):
        # Tall
        predictions_test["R-Squared"] = [
            0 if i < 0 else i for i in predictions_test["R-Squared"]
        ]
        plt.figure(figsize=(3, 9))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(y=predictions_test.index,
                          x="R-Squared",
                          data=predictions_test)
        ax1.set(xlim=(0, 1))
    st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'), unsafe_allow_html=True)
    # Wide
    plt.figure(figsize=(9, 3))
    sns.set_theme(style="whitegrid")
    ax1 = sns.barplot(x=predictions_test.index,
                      y="R-Squared",
                      data=predictions_test)
    ax1.set(ylim=(0, 1))
    plt.xticks(rotation=90)
    st.pyplot(plt)
    st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'), unsafe_allow_html=True)

    with st.markdown('**RMSE (capped at 50)**'):
        # Tall
        predictions_test["RMSE"] = [
            50 if i > 50 else i for i in predictions_test["RMSE"]
        ]
        plt.figure(figsize=(3, 9))
        sns.set_theme(style="whitegrid")
        ax2 = sns.barplot(y=predictions_test.index,
                          x="RMSE",
                          data=predictions_test)
    st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'),
                unsafe_allow_html=True)
    # Wide
    plt.figure(figsize=(9, 3))
    sns.set_theme(style="whitegrid")
    ax2 = sns.barplot(x=predictions_test.index,
                      y="RMSE",
                      data=predictions_test)
    plt.xticks(rotation=90)
    st.pyplot(plt)
    st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'),
                unsafe_allow_html=True)

    with st.markdown('**Calculation time**'):
        # Tall
        predictions_test["Time Taken"] = [
            0 if i < 0 else i for i in predictions_test["Time Taken"]
        ]
        plt.figure(figsize=(3, 9))
        sns.set_theme(style="whitegrid")
        ax3 = sns.barplot(y=predictions_test.index,
                          x="Time Taken",
                          data=predictions_test)
    st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                unsafe_allow_html=True)
    # Wide
    plt.figure(figsize=(9, 3))
    sns.set_theme(style="whitegrid")
    ax3 = sns.barplot(x=predictions_test.index,
                      y="Time Taken",
                      data=predictions_test)
    plt.xticks(rotation=90)
    st.pyplot(plt)
    st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                unsafe_allow_html=True)
Exemplo n.º 24
0
palette_colors = [(c[0] / 255.0, c[1] / 255.0, c[2] / 255.0)
                  for c in DESATURATED_PALETTE[2:6] + [(0, 0, 0)]]

parser = argparse.ArgumentParser()
parser.add_argument("--dataset",
                    "-d",
                    choices=["penguins", "dots", "mpg"],
                    default="mpg")
args = parser.parse_args()

inky = Inky()
saturation = 0
dpi = 80
buf = io.BytesIO()

seaborn.set_theme(style="white")

if args.dataset == "mpg":
    palette = seaborn.color_palette(palette_colors, n_colors=3)
    mpg = seaborn.load_dataset("mpg")

    plot = seaborn.relplot(x="horsepower",
                           y="mpg",
                           hue="origin",
                           size="weight",
                           sizes=(40, 400),
                           alpha=1.0,
                           palette=palette,
                           data=mpg)

if args.dataset == "penguins":
Exemplo n.º 25
0
        'color': 'black',
        'weight': 'normal',
        'verticalalignment': 'bottom'
    }

    plt.figure(figsize=(20, 10))
    plt.xlabel(str(nb_epochs) + ' Epochs', **font)
    plt.ylabel('Accuracy', **font)
    plt.plot(epochs, accuracy, 'r', label='Training acc')
    plt.plot(epochs, val_acc, 'b', label='Validation acc')
    plt.title('Training and validation acc', **title_font)
    plt.legend()
    plt.savefig('../rel/figuras/' + filename + 'acc.png')


sns.set_theme()

# Tamanho máximo de uma sentença
SEQUENCE_MAXLEN = 50

# Carrega os embeddings do word2vec
word2vec_model = KeyedVectors.load_word2vec_format("../data/word2vec_200k.txt")

# Carrega os datasets
train = pd.read_csv('../data/train.csv', sep=';')
val = pd.read_csv('../data/val.csv', sep=';')
test = pd.read_csv('../data/test.csv', sep=';')

x_train = train['review_text'].values
y_train = train['overall_rating'].values
x_train = word_to_index(x_train)
Exemplo n.º 26
0
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 03 17:04:37 2020

@author: vivek
"""

import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 6.4, 4.8

import numpy as np
import seaborn as sns
sns.set_theme(style="ticks", palette="pastel")
import altair as alt

summ_data = pd.read_csv(
    r'C:\OneDrive\OneDrive-GitHub\Challenges-and-Competitions\TidyTuesday\Data\2020-11-10\summ_data.csv'
)
summ_data.columns = ['Country', 'Landline connections', 'Mobile connections']
ax = summ_data.plot(
    x='Landline connections',
    y='Mobile connections',
    kind='scatter',
    title='2017: Connections per 100 people (each point represents a country)')
# Inference - there are a lot of countries with more connections than number of people

import geopandas as gpd
from shapely.geometry import Point, Polygon
import adjustText as aT
Exemplo n.º 27
0
    for i in range(2, 4):
        ##Ad ogni ciclo seleziono una dimensione sempre più grande
        pca = PCA(n_components=i)
        pca.fit(X)
        X_pca = pca.transform(X)

        scores = KNNclf(X_pca, y, param_list=param_list)

        print('Nested Cross validation Accuracy: %0.4f (+/- %0.4f)' %
              (scores.mean(), scores.std() * 2))
        CV_scores.append(scores.mean())
        CV_std.append(scores.std())

    #Mettiamo in un grafico i risultati ottenuti
    plt.figure()
    sns.set_theme(style='darkgrid')
    plt.ylabel('CV scores')
    #plt.plot(labels, CV_scores, 'o', color = 'black')
    plt.errorbar(labels, CV_scores, CV_std, fmt='.', color='black')
    plt.show()

    ##Adesso vediamo uan rappresentazione del classificatore in 2d
    X = df2R.values
    #Questa volta splittiamo in test set e train set
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=42,
                                                        stratify=y)
    clf = KNeighborsClassifier(n_neighbors=4, weights='distance')
    h = 0.2
Exemplo n.º 28
0
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import torch

sns.set()

sns.set_theme(style="darkgrid")
if __name__ == '__main__':
    conv_rewards_train = torch.load(
        'ckpt_train/conv_test_rewards.ckpt').numpy()
    conv_rewards_test = torch.load('ckpt_test/conv_test_rewards.ckpt').numpy()
    print(conv_rewards_train.mean())

    vit_rewards_train = torch.load('ckpt_train/vit_test_rewards.ckpt').numpy()
    vit_rewards_test = torch.load('ckpt_test/vit_test_rewards.ckpt').numpy()
    print(vit_rewards_train.mean())

    levels = [i for i in range(0, 50)]
    levels = levels + levels

    convs = ['conv' for i in range(0, 50)]
    vits = ['vit' for i in range(0, 50)]

    model_names = convs + vits
    models_mean = list(conv_rewards_train)[0:50] + list(
        vit_rewards_train)[0:50]

    # #FF0000
    # #ffcc66
Exemplo n.º 29
0
"""
Dot plot with several variables
===============================

_thumb: .3, .3
"""
import seaborn as sns
sns.set_theme(style="whitegrid")

# Load the dataset
crashes = sns.load_dataset("car_crashes")

# Make the PairGrid
g = sns.PairGrid(crashes.sort_values("total", ascending=False),
                 x_vars=crashes.columns[:-3],
                 y_vars=["abbrev"],
                 height=10,
                 aspect=.25)

# Draw a dot plot using the stripplot function
g.map(sns.stripplot,
      size=10,
      orient="h",
      palette="flare_r",
      linewidth=1,
      edgecolor="w")

# Use the same x axis limits on all columns and add better labels
g.set(xlim=(0, 25), xlabel="Crashes", ylabel="")

# Use semantically meaningful titles for the columns
Exemplo n.º 30
0
import os
from pathlib import Path
import numpy as np
import pandas as pd

from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator
import seaborn as sns

run_dir = Path(__file__).parents[2] / "runs"
overleaf_dir = Path(__file__).resolve().parents[2] / "overleaf" / "figures"
sns.set_theme("talk", style="whitegrid")


def tabulate_events(dpath):
    summary_iterators = [
        EventAccumulator(os.path.join(dpath, dname)).Reload()
        for dname in os.listdir(dpath)
    ]

    tags = summary_iterators[0].Tags()["scalars"]

    for it in summary_iterators:
        assert it.Tags()["scalars"] == tags

    out = defaultdict(list)
    steps = []

    for tag in tags:
        steps = [e.step for e in summary_iterators[0].Scalars(tag)]