Пример #1
0
def generate_align_vs_year(data_frame, output_folder, file_name):
    """
    Generates a chart of the relation between align and year in dataset.
    Also saves resulting image as file in given output folder.
    Parameters:
    -----------
    data_frame : pandas.DataFrame
        input path to be verified
    output_folder : str
        output folder path to save the chart
    file_name : str
        file name for generated chart image
        
    Returns:
    -----------
    None
    """
    align_vs_year = (alt.Chart(data_frame, title = "Alignment over Time").mark_line().encode(
        alt.X('year', title = 'Year(1935-2013)', axis=alt.Axis(format='t')),
        y = alt.Y('count()', title = "Character Count"),
        color = alt.Color("align", title="Alignment")
    ).properties(height=300, width=500))

    save(align_vs_year, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver)
    if verbose: print("Alignment vs year chart created, saved to " + 
                      output_folder + 
                      "/figures/" + 
                      file_name + 
                      '.png')
def save_figures(figures: dict, save_dir: str, ext: str = 'svg'):
    """Save figure objs to specified path with extension

    Parameters
    ----------
    figures : dict
        dict of {name: Altair figure obj}
    save_dir : str
        root dir to save figures, eg save_dir / name.ext
    ext : str
        extension to save figures with
    """

    # create root dir if doesn't exist
    p_root = Path(save_dir)
    if not p_root.exists():
        p_root.mkdir(parents=True)
        log.info(f'Created dir: {p_root}')

    # loop input dict and save figs
    for name, fig in figures.items():
        p = p_root / f'{name}.{ext}'

        try:
            save(fig, str(p))
        except:
            log.error(f'Could not save figure at: {p}')
            raise  # re raise error, don't continue if saving fig fails
Пример #3
0
 def tune_regression_tree(self,
                          sample_size,
                          hparams=['max_features', 'max_depth'],
                          folder="."):
     """
     Input: int
         sample_size -   This represents the number of samples per state
                         of tuning. Note that the more samples, the more
                         time it takes.
     Optional Input: list of strings, String
         hparams     -   This represents which parameters to tune.
                         Possible elements of list are:
                         ["max_features", "max_depth"]
                         By default, this will tune all of these parameters.
         folder      -   path of the folder where the graphs should be saved
                         defaults to the current folder where this program
                         is located. Do not include final backslash.
     Output:
         Will save plots to the local directory representing the parameters
         that were tuned.
     """
     # Tuning parameters
     if 'max_features' in hparams:
         acc_df = pd.DataFrame(
             columns=['Number of Features', 'test_acc', 'train_acc'])
         max_features = list(
             range(self._n + 3, self._features.shape[1], self._n))
         index = 0
         for i in max_features:
             for j in range(sample_size):
                 train, test = self.train_regression_tree(max_features=i,
                                                          max_depth=10,
                                                          model_get=False)
                 acc_df.loc[index] = pd.Series({
                     'Number of Features': i,
                     'train_acc': train,
                     'test_acc': test
                 })
                 index += 1
         # Make plot
         feat_plot = self._plot_accuracy(acc_df, 'Number of Features')
         save(feat_plot, folder + "/regression_tree_features.html")
     if 'max_depth' in hparams:
         acc_df = pd.DataFrame(
             columns=['Number of Levels Deep', 'test_acc', 'train_acc'])
         max_depth = list(range(2, 20, 2))
         index = 0
         for i in max_depth:
             for j in range(sample_size):
                 train, test = self.train_regression_tree(max_depth=i,
                                                          model_get=False)
                 acc_df.loc[index] = pd.Series({
                     'Number of Levels Deep': i,
                     'train_acc': train,
                     'test_acc': test
                 })
                 index += 1
         # Make plot
         depth_plot = self._plot_accuracy(acc_df, 'Number of Levels Deep')
         save(depth_plot, folder + "/regression_tree_depth.html")
Пример #4
0
def test_save_chart_method(
    spec: JSONDict, fmt: str, method: Union[str, Type[Saver]]
) -> None:
    fp: Union[io.BytesIO, io.StringIO]
    if fmt in ["png", "pdf"]:
        fp = io.BytesIO()
    else:
        fp = io.StringIO()

    valid_formats: Dict[str, List[str]] = {}
    if method == "node":
        valid_formats = NodeSaver.valid_formats
    elif method == "selenium":
        valid_formats = SeleniumSaver.valid_formats
    elif isinstance(method, type):
        valid_formats = method.valid_formats
    else:
        raise ValueError(f"unrecognized method: {method}")

    if fmt not in valid_formats["vega-lite"]:
        with pytest.raises(ValueError):
            save(spec, fp, fmt=fmt, method=method)
    else:
        save(spec, fp, fmt=fmt, method=method)
        check_output(fp.getvalue(), fmt)
Пример #5
0
 def tune_neural_net(self,
                     sample_size,
                     hparams=['max_iter', 'hidden_layer_sizes'],
                     folder="."):
     """
     Input: int
         sample_size -   This represents the number of samples per state
                         of tuning. Note that the more samples, the more
                         time it takes.
     Optional Input: list of strings, String
         hparams     -   This represents which parameters to tune.
                         Possible elements of list are:
                         ["max_iter", "hidden_layer_sizes"]
                         By default, this will tune all of these parameters.
         folder      -   path of the folder where the graphs should be saved
                         defaults to the current folder where this program
                         is located. Do not include final backslash.
     Output:
         Will save plots to the local directory representing the parameters
         that were tuned.
     """
     # Tuning parameters
     if 'max_iter' in hparams:
         acc_df = pd.DataFrame(
             columns=['Number of Iterations', 'test_acc', 'train_acc'])
         iters = list(range(200, 2001, 200))
         index = 0
         for i in iters:
             for j in range(sample_size):
                 train, test = self.train_neural_net(max_iter=i,
                                                     model_get=False)
                 acc_df.loc[index] = pd.Series({
                     'Number of Iterations': i,
                     'train_acc': train,
                     'test_acc': test
                 })
                 index += 1
         # Make plot
         iter_plot = self._plot_accuracy(acc_df, 'Number of Iterations')
         save(iter_plot, folder + "/neural_net_iterations.html")
     if 'hidden_layer_sizes' in hparams:
         acc_df = pd.DataFrame(columns=[
             'Hidden Layer Configuration', 'test_acc', 'train_acc'
         ])
         layers = [(50, ), (100, ), (50, 50), (100, 50), (100, 100),
                   (100, 50, 50), (100, 100, 50), (100, 100, 100)]
         index = 0
         for i in layers:
             for j in range(sample_size):
                 train, test = self.train_neural_net(hidden_layer_sizes=i,
                                                     model_get=False)
                 acc_df.loc[index] = \
                     pd.Series({'Hidden Layer Configuration': i,
                                'train_acc': train,
                                'test_acc': test})
                 index += 1
         # Make plot
         depth_plot = self._plot_accuracy(acc_df,
                                          'Hidden Layer Configuration')
         save(depth_plot, folder + "/neural_net_layers.html")
Пример #6
0
def test_embed_options_save_html(spec: JSONDict, inline: bool,
                                 embed_options: JSONDict) -> None:
    fp = io.StringIO()
    with alt.renderers.set_embed_options(**embed_options):
        save(spec, fp, "html", inline=inline)
    html = fp.getvalue()
    assert f"const embedOpt = {json.dumps(embed_options or {})};" in html
Пример #7
0
def generate_align_vs_features(data_frame, output_folder, file_name):
    """
    Generates a chart of the relation between align and other features in dataset.
    Also saves resulting image as file in given output folder.
    Parameters:
    -----------
    data_frame : pandas.DataFrame
        input path to be verified
    output_folder : str
        output folder path to save the chart
    file_name : str
        file name for generated chart image
        
    Returns:
    -----------
    None
    """
    features = ['id', 'eye', 'hair', 'sex', 'gsm', 'publisher']
    align_vs_features = (alt.Chart(data_frame).mark_circle().encode(
        alt.Y(alt.repeat(), type='ordinal'),
        alt.X('count()', title = "Character Count"),
        size =alt.Size('count()', legend=alt.Legend(title="Characters")),
        color = alt.Color("align", legend=alt.Legend(title="Alignment"))
        ).properties(height=300, width=200).repeat(repeat=features, columns=3))

    save(align_vs_features, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver)
    if verbose: print("Alignment vs features chart created, saved to " + 
                      output_folder + 
                      "/figures/" + 
                      file_name + 
                      '.png')
Пример #8
0
def test_infer_format(spec: JSONDict) -> None:
    with temporary_filename(suffix=".html") as filename:
        with open(filename, "w") as fp:
            save(spec, fp)
        with open(filename, "r") as fp:
            html = fp.read()
    assert html.strip().startswith("<!DOCTYPE html>")
Пример #9
0
def generate_align_vs_appearances(data_frame, output_folder, file_name):
    """
    Generates a chart of the relation between align and appearances in dataset.
    Also saves resulting image as file in given output folder.
    Parameters:
    -----------
    data_frame : pandas.DataFrame
        input path to be verified
    output_folder : str
        output folder path to save the chart
    file_name : str
        file name for generated chart image
        
    Returns:
    -----------
    None
    """
    align_vs_appearances = (
        alt.Chart(
            data_frame.dropna(), title="Character Appearances by Alignment"
            ).mark_boxplot().encode(
                alt.X('appearances:Q', title = 'Appearances'),
                y = alt.Y('align:O', title = "Alignment"),
                color = alt.Color("align", title = "Alignment"),
                size='count()'
                ).properties(height=300, width=500)).interactive()

    save(align_vs_appearances, output_folder +"/figures/" + file_name + '.png', method='selenium', webdriver=driver)
    if verbose: print("Alignment vs appearances chart created, saved to " + 
                      output_folder + 
                      "/figures/" + 
                      file_name + 
                      '.png')
Пример #10
0
def animate_bezier(
    xs: np.ndarray,
    ys: np.ndarray,
    step: float,
    filename: str = "bezier",
    download_folder: str = expanduser("~") + "/Downloads/",
) -> None:
    ts = np.arange(0, 1 + step, step)
    ts = np.round_(ts, decimals=num_decimals(step))
    colours = color_palette("rocket", len(xs))
    colours = list(
        map(
            lambda tuple: "#%02x%02x%02x"
            % (int(tuple[0] * 255), int(tuple[1] * 255), int(tuple[2] * 255)),
            colours,
        )
    )

    dfs = create_df_dict(np.vstack((xs, ys)).T, ts)
    chart = create_visualisation(dfs, ts, colours)

    for i in range(len(dfs[1]) - 1):
        row = dfs[1].iloc[i]
        row2 = dfs[1].iloc[i + 1]
        xs = np.hstack((row.loc["x"], row2.loc["x"]))
        ys = np.hstack((row.loc["y"], row2.loc["y"]))
        chart += draw_curve_segment(xs, ys, colours[0], 0.2)

    chart = chart.interactive()

    save(chart, f"{download_folder}/{filename}.html")
Пример #11
0
def graph_tp(data):
    n_subject = 1
    save_dir = "tmp/sync_hotstuff_throughput.png"
    data = pd.DataFrame(
        {'Number of Clients':  num_clients,
         'Throughput (Opersations/Second)': data})
    chart = alt.Chart(data).mark_line().encode(x='Number of Clients:Q', y='Throughput (Opersations/Second):Q')
    save(chart, save_dir)
Пример #12
0
def graph_lat(data):
    n_subject = 1
    save_dir = "tmp/sync_hotstuff_latency.png"
    data = pd.DataFrame(
        {'Number of Clients':  num_clients,
         'Latency': data})
    chart = alt.Chart(data).mark_line().encode(x='Number of Clients:Q', y='Latency:Q')
    save(chart, save_dir)
Пример #13
0
def test_embed_options_save_html_override(spec: JSONDict) -> None:
    fp = io.StringIO()
    embed_options: JSONDict = {"renderer": "svg"}
    alt_embed_options: JSONDict = {"padding": 20}
    with alt.renderers.set_embed_options(**alt_embed_options):
        save(spec, fp, "html", embed_options=embed_options)
    html = fp.getvalue()
    assert f"const embedOpt = {json.dumps(embed_options)};" in html
Пример #14
0
def draw_rerand_trajectory(source):
    upperbounds = source[source['ub'] == 1][['name', 'time', 'leaks']]
    #print(upperbounds)
    lines = alt.Chart(
        source,
        height=600,
        width=900,
    ).mark_line(strokeDash=[5, 4]).encode(
        #x='time',
        y='name',
        x=alt.Y('time', title='time (second)', axis=alt.Axis(tickMinStep=0.5)),
        #color='name',
        #strokeDash='name:0',
        #color='color',
        #stroke='color'
        color=alt.Color('name', legend=None))

    circles = alt.Chart(source).mark_circle(
        color='lightslategray',
        #color=alt.value("#5B5B61"),
        opacity=1.0,
        size=80.0).encode(y='name', x='time')

    circles2 = alt.Chart(upperbounds).mark_circle(
        color='black',
        opacity=1.0,
        size=100.0,
        #dx = -4.0
    ).encode(y='name', x='time'
             #facet='name'
             )

    border = alt.Chart(source).mark_image(width=20,
                                          height=20).encode(y='name',
                                                            x='time',
                                                            url='img')

    annotation = alt.Chart(source).mark_text(align='left',
                                             baseline='middle',
                                             opacity=1.0,
                                             fontSize=18,
                                             dx=-3.5,
                                             dy=-13.0).encode(x='time',
                                                              y='name',
                                                              text='leaks')

    #(lines + circles + annotation + border).save('mychart2.html', scale_factor=10.0)

    chart = alt.layer(
        lines, circles, circles2,
        annotation).configure_view(stroke='transparent').configure_axis(
            labelFontSize=22,
            titleFontSize=22,
            #grid=False
            #tickOffset = 10
            tickCount=20)
    #chart
    save(chart, "chart.html", scale_factor=2.0)
Пример #15
0
def validation_plot():
    print("Plotting validation")
    files = [
        f"results/res_etcd.simple.go.none.nn_{n_servers}.nc_1.write_ratio_1.mtbf_1.rate_{rate}.duration_60.tag_repeat-{repeat}.res"
        for rate in [
            1, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000,
            20000, 22000, 24000, 26000, 28000, 30000
        ] for n_servers in [3, 5, 7, 9] for repeat in range(repeats)
    ]
    if not files_exist(files):
        return
    # Rate Latency
    validation_data = pd.concat([
        pf.read_in_res(
            f"results/res_etcd.simple.go.none.nn_{n_servers}.nc_1.write_ratio_1.mtbf_1.rate_{rate}.duration_60.tag_repeat-{repeat}.res",
            {
                'repeat': repeat,
                'rate': rate,
                'n_servers': n_servers
            }) for rate in [
                1, 2000, 4000, 6000, 8000, 10000, 12000, 14000, 16000, 18000,
                20000, 22000, 24000, 26000, 28000, 30000
            ] for n_servers in [3, 5, 7, 9] for repeat in range(repeats)
    ],
                                ignore_index=True)

    rate_lat_res = achieved_rate_preprocess(validation_data)

    df = rate_lat_res
    chart = alt.vconcat()
    for repeat in range(repeats):
        chart = alt.vconcat(
            chart, achieved_rate_plot(df[df['repeat'] == repeat], 100))
    alts.save(chart, "figures/validation_rate_latency.pdf")

    # CDF
    cdf_data = validation_data
    cdf_data = cdf_data[cdf_data['rate'] == 10000]

    group_by = ['n_servers', 'repeat']
    cdfs = pd.concat([
        process(group, dict(zip(group_by, params)), lambda group: cdf(group))
        for params, group in cdf_data.groupby(group_by)
    ])

    chart = alt.vconcat()
    for repeat in range(repeats):
        rep_chart = alt.Chart(cdfs).mark_line(clip=True).encode(
            x=alt.X('latency:Q',
                    axis=alt.Axis(title='Latency (ms)'),
                    scale=alt.Scale(domain=[0, 100])),
            y=alt.Y('percentile:Q',
                    axis=alt.Axis(title='Cumulative fraction')),
            color=alt.Color('n_servers:N', legend=None)).properties(height=130)

        chart = alt.vconcat(chart, rep_chart)

    alts.save(chart, "figures/validation_cdf.pdf")
Пример #16
0
def test_save_spec(spec: JSONDict, fmt: str) -> None:
    fp: Union[io.BytesIO, io.StringIO]
    if fmt in ["png", "pdf"]:
        fp = io.BytesIO()
    else:
        fp = io.StringIO()

    save(spec, fp, fmt=fmt)
    check_output(fp.getvalue(), fmt)
Пример #17
0
def test_save_chart(chart: alt.TopLevelMixin, fmt: str) -> None:
    fp: Union[io.BytesIO, io.StringIO]
    if fmt in ["png", "pdf"]:
        fp = io.BytesIO()
    else:
        fp = io.StringIO()

    save(chart, fp, fmt=fmt)
    check_output(fp.getvalue(), fmt)
Пример #18
0
def test_html_inline(spec: JSONDict, inline: bool) -> None:
    fp = io.StringIO()
    save(spec, fp, fmt="html", inline=inline)
    html = fp.getvalue()

    cdn_url = "https://cdn.jsdelivr.net"
    if inline:
        assert cdn_url not in html
    else:
        assert cdn_url in html
def save_altair_plot(fig: alt.Chart) -> None:

    fname = png_name(fig)

    save(
        chart=fig,
        fp=str(FIGURES.joinpath(fname)),
        fmt="png",
        method="selenium",
        scale_factor=6.0,
    )
Пример #20
0
def save_altair(fig, name, driver, path=fig_path):
    """Saves an altair figure as png and html"""
    print(path)
    save(
        fig,
        f"{path}/png/{name}.png",
        method="selenium",
        webdriver=driver,
        scale_factor=5,
    )
    fig.save(f"{path}/html/{name}.html")
Пример #21
0
def test_save_chart_data_warning(chart: alt.TopLevelMixin) -> None:
    fp = io.StringIO()
    with alt.data_transformers.enable("json"):
        with pytest.warns(UserWarning) as record:
            save(chart, fp, fmt="html")
    assert len(record) == 1
    assert (
        record[0]
        .message.args[0]
        .startswith("save() may not function properly with the 'json' data transformer")
    )
Пример #22
0
def save_chart(
    chart: alt.Chart, base_path: Union[Path, str], filetypes: List[str], method=None
):
    base_path = str(base_path)
    for t in filetypes:
        path = base_path + "." + t
        if method == "node" and t in ("svg", "pdf"):
            method = "node"
        else:
            method = None
        altair_saver.save(chart, safe_file(path), method=method)
Пример #23
0
def push_report(service):
    user_email = service.users().getProfile(
        userId='me').execute()['emailAddress']
    print(user_email)
    global params
    y_bottom = np.zeros(12)
    charts = []
    df1 = pd.DataFrame()
    for company_name in params:
        company = params[company_name]
        if company_name in IGNORE_LIST or not create_dump(service, company):
            continue
        # if company_name in IGNORE_LIST:
        #     continue
        try:
            x, y = getData(company_name, company['fname'])
            x, y = coerceData(x, y)
        except Exception as e:
            print(e)
            continue
        print(company_name, y)
        y_bottom = np.add(y_bottom, y)
        source = pd.DataFrame({
            'month': x,
            'spent': y,
            'company': [company_name] * 12
        })
        chart = alt.Chart(source).mark_bar(size=15).encode(
            x=alt.X('month', title=''),
            y=alt.Y('spent',
                    title='Amount spent (₹)')).properties(title=company_name, )
        charts.append(chart)
        source.set_index('month')
        if not df1.size:
            df1 = source
        else:
            df1 = df1.append(source)
    if not df1.size:
        print('No data found for ' + user_email)
        return
    stackedchart = alt.Chart(df1).mark_bar(size=15).encode(
        alt.X('month', title=''),
        y=alt.Y('sum(spent)', title='Amount spent (₹)'),
        color='company').properties(title='Aggregate Monthly Spending')
    charts.insert(0, stackedchart)
    repchart = alt.VConcatChart(vconcat=charts)
    save(repchart,
         os.path.dirname(os.path.abspath(__file__)) + '/data/report.png',
         scale_factor=1.5)
    with app.app_context():
        context = {'amount': np.sum(y_bottom)}
        email_content = render_template('report.html', **context)
    push_email(email_content, user_email)
Пример #24
0
def alt_to_latex(chart,
                 label,
                 caption,
                 path_tex,
                 path_figure,
                 path_figure_reporting='figures/charts',
                 width='width=0.8\\textwidth',
                 position='h'):
    path_figure = create_tex(label, caption, path_tex, path_figure,
                             path_figure_reporting, width, position)

    save(chart, path_figure)
    return chart
Пример #25
0
def save_plots(output_dir, plots_dict):
    if os.path.exists(output_dir):
        pass
    else:
        os.makedirs(os.path.dirname(output_dir))

    for k, v in plots_dict.items():
        try:
            driver = webdriver.Chrome(ChromeDriverManager().install())
            save(v, output_dir + k, method='selenium', webdriver=driver)
            print("Successfully saved {}".format(k))
        except Exception as e:
            print(e)
def main(input_file, out_dir):
    covid = pd.read_csv(input_file)

    line_plot = create_line_plot(covid)
    ridgeline_plot = create_ridgeline_plot(covid)

    # check if the output directory already exist; if yes - save the plots in it
    try:
        save(line_plot,
             f"{out_dir}/line_plot.png",
             method='selenium',
             webdriver=driver)
        save(ridgeline_plot,
             f"{out_dir}/ridgeline_plot.png",
             method='selenium',
             webdriver=driver)

        # if the output directory does not exist - create a new output directory first and then save the plots in it
    except:
        os.makedirs(os.path.dirname(out_dir), exist_ok=True)
        save(line_plot,
             f"{out_dir}/line_plot.png",
             method='selenium',
             webdriver=driver)
        save(ridgeline_plot,
             f"{out_dir}/ridgeline_plot.png",
             method='selenium',
             webdriver=driver)
Пример #27
0
def save_plots(output_dir, plots_dict):
    for k, v in plots_dict.items():
        try:
            chrome_options = Options()
            chrome_options.add_argument("--headless")
            chrome_options.add_argument("--disable-gpu")
            chrome_options.add_argument("--no-sandbox")
            chrome_options.add_argument("start-maximized")  #
            chrome_options.add_argument("disable-infobars")
            chrome_options.add_argument("--disable-extensions")
            driver = webdriver.Chrome(options=chrome_options)
            save(v, output_dir + "/" + k, webdriver=driver)
            print("Successfully saved {}".format(k))
        except Exception as e:
            print(e)
Пример #28
0
def save_plot(plot, out, plot_name):
    """save the plot object

    Args:
        plot (alt.Chart): plot bject to save
        out (string): output directory
        plot_name (string): name of the plot to be inlcluded in the filename
    """

    if not os.path.exists(out):
        os.makedirs(out)

    file_name = f'{out}/eda_{plot_name}.png'
    driver = webdriver.Chrome()
    save(plot, file_name, method='selenium', webdriver=driver)
Пример #29
0
def pdf_download(file, dia):
    my_file = file
    save_dir = os.path.dirname(file)
    if not os.path.exists(save_dir):
        os.system(f'mkdir -p {save_dir}')
    if os.path.exists(my_file):
        os.system(f'rm {my_file}')

    save(dia, my_file)
    filename = Config.pdf_name
    with open(my_file, 'rb') as f:
        s = f.read()
    download_button_str = dow.download_button(s, filename,
                                              f'Click here to download PDF')
    st.markdown(download_button_str, unsafe_allow_html=True)
Пример #30
0
def compare_models(weather, folder):
    '''
    This produces a graph comparing each of the models for accuracy (R^2)
    This will use the best hyperparameters for the data.csv dataset

    Input:
        weather -   This is the instance of the WeatherModels class
        folder  -   This is the path of the folder the graphs should be stored
                    Should not include the final backslash
    Output:
        Will store the graphs in the folder specified
    '''
    acc_df = pd.DataFrame(columns=['Model', 'test_acc'])
    index = 0
    for i in range(10):
        model = weather.train_linear_regressor(model_get=False)
        acc_df.loc[index] = pd.Series({
            'Model': 'Linear Regression',
            'test_acc': model[1]
        })
        index += 1
        model = weather.train_regression_tree(max_features=35,
                                              max_depth=8,
                                              model_get=False)
        acc_df.loc[index] = pd.Series({
            'Model': 'Decision Tree Regression',
            'test_acc': model[1]
        })
        index += 1
        model = weather.train_neural_net(max_iter=500,
                                         hidden_layer_sizes=(100, 100, 100),
                                         model_get=False)
        acc_df.loc[index] = pd.Series({
            'Model': 'Neural Network',
            'test_acc': model[1]
        })
        index += 1
    # Create graph
    test_df_min = floor(acc_df['test_acc'].min() * 100) / 100.0
    test_df_max = floor(acc_df['test_acc'].max() * 100) / 100.0 + 0.01
    test_chart = alt.Chart(acc_df).mark_boxplot(size=100).encode(
        x='Model',
        y=alt.Y(
            'test_acc:Q',
            axis=alt.Axis(title='Accuracy (R^2)'),
            scale=alt.Scale(domain=[test_df_min, test_df_max]))).properties(
                height=400, width=800, title='Accuracy per Model')
    save(test_chart, folder + "/" + "model_comparison.html")