Пример #1
0
def analysis(name):
    files = glob.glob("{}*.hyperopt".format(name))
    trials = None
    for file in files:
        opt_data = pickle.load(open(file, "rb"))
        _trials = opt_data["trials"]

        if trials is None:
            trials = _trials
        else:
            for attr, value in _trials.__dict__.items():
                if isinstance(getattr(_trials, attr),
                              list) and not attr.startswith("__"):
                    setattr(trials, attr,
                            getattr(trials, attr) + getattr(_trials, attr))

        print()
    main_plot_history(
        trials=trials,
        algo=tpe.suggest,
    )
    main_plot_histogram(
        trials=trials,
        algo=tpe.suggest,
    )
    plot_vars(trials)
Пример #2
0
                               "max_evals": 20
                           })
    res.get()

# %%

from hyperopt.plotting import main_plot_vars, main_plot_history, main_plot_histogram
import matplotlib.pylab as plt

for sp, trls in zip([space_lgb], [trials_lgb]):
    domain = base.Domain(lgb_run, sp)
    # plt.figure(figsize=(20, 40))
    # main_plot_vars(trls, bandit=domain, colorize_best=30, columns=1)
    plt.figure(figsize=(20, 5))
    # plt.ylim((-0.003, 0.003))
    main_plot_history(trls, bandit=domain)

# NOTE: 对trials_lgb的性能的评判应该是VA的第一个metric指标的负数,至少在这个例子里是这样

trials_lgb.trials  # 所有模型的参数和结果的字典组成的一个list
trials_lgb.results  # 返回所有实验的结果
trials_lgb.miscs  # 返回所有实验的参数
trials_lgb.vals  # 返回所有实验的跟space更新有关的参数

trials_lgb.trials[0]['misc']['vals']
tmp1 = space_lgb['lgb_param']['bagging_fraction']
tmp2 = 2**sample_int("v_lambda_l1", 0, 2) - 1
5**-1.6  # 0.07614615754863513

type(tmp2), dir(tmp2)
                               'hyperopt_curve_fitting_trials_report.csv')
trials_summary.to_csv(trials_log_file)
print('-> trials summary report written here : ' + trials_log_file)

if finished_jobs > 0:
    trials_summary.plot(subplots=True, x='tid')
    plt.legend(loc='best')
    plt.savefig(os.path.join(outputlog_folder, 'hparams_vs_trials'))
    #draw the scatter matrix
    scatter_matrix(trials_summary[list(trial['misc']['vals']) + ['loss']],
                   alpha=0.2,
                   figsize=(6, 6),
                   diagonal='kde')
    plt.savefig(os.path.join(outputlog_folder, 'scatter_matrix'))
    plt.figure()
    plotting.main_plot_history(trials)
    plt.savefig(os.path.join(outputlog_folder, 'hyperopt_history'))
    plt.figure()
    plotting.main_plot_histogram(trials)
    plt.savefig(os.path.join(outputlog_folder, 'hyperopt_histogram'))
    #plotting.main_plot_vars(trials, scope)#FIXME, check https://github.com/hyperopt/hyperopt/blob/master/hyperopt/tests/test_plotting.py
    try:
        plt.show()
    except:
        print(
            'matplotlib.pyplot.show() could not be run, maybe this computer does not have a X server'
        )
        pass
else:
    print(
        '-> Could not run any trial correctly... look for errors, first try to start a single experiment manually'
Пример #4
0
def optimize_segmentation(pimg, rawdata, segparams, mypath_opt):
    inds = rawdata['inds_labeled_slices']
    ## recompute gt_slices! don't use gt_slices from rawdata
    gt_slices = rawdata['gt_slices']
    stack_segmentation_function = segparams['function']
    segmentation_space = segparams['space']
    segmentation_info = segparams['info']
    n_evals = segparams['n_evals']
    img_instseg = pimg  #[[0]]

    ## optimization params
    # mypath_opt = add_numbered_directory(savepath, 'opt')

    def risk(params):
        print('Evaluating params:', params)
        t0 = time()
        hyp = np.array(
            [stack_segmentation_function(x, params) for x in img_instseg])
        hyp = hyp.astype(np.uint16)
        pred_slices = hyp[inds[0], inds[1]]
        res = np.array([ss.seg(x, y) for x, y in zip(gt_slices, pred_slices)])
        t1 = time()
        val = res.mean()
        print("SEG: ", val)
        return -val

    ## perform the optimization

    trials = ho.Trials()
    best = ho.fmin(risk,
                   space=segmentation_space,
                   algo=ho.tpe.suggest,
                   max_evals=n_evals,
                   trials=trials)

    pickle.dump(trials, open(mypath_opt / 'trials.pkl', 'wb'))
    print(best)

    losses = [x['loss'] for x in trials.results if x['status'] == 'ok']
    df = pd.DataFrame({**trials.vals})
    df = df.iloc[:len(losses)]
    df['loss'] = losses

    ## save the results

    def save_img():
        plt.figure()
        # plt.scatter(ps[0], ps[1], c=values)
        n = segmentation_info['name']
        p0 = segmentation_info['param0']
        p1 = segmentation_info['param1']
        x = np.array(trials.vals[p0])
        y = np.array(trials.vals[p1])
        c = np.array([
            t['loss'] for t in trials.results
            if t.get('loss', None) is not None
        ])
        plt.scatter(x[:c.shape[0]], y[:c.shape[0]], c=c)
        plt.title(n)
        plt.xlabel(p0)
        plt.ylabel(p1)
        plt.colorbar()
        filename = '_'.join([n, p0, p1])
        plt.savefig(mypath_opt / (filename + '.png'))

    save_img()

    from hyperopt.plotting import main_plot_vars
    from hyperopt.plotting import main_plot_history
    from hyperopt.plotting import main_plot_histogram

    plt.figure()
    main_plot_histogram(trials=trials)
    plt.savefig(mypath_opt / 'hypopt_histogram.pdf')

    plt.figure()
    main_plot_history(trials=trials)
    plt.savefig(mypath_opt / 'hypopt_history.pdf')

    domain = ho.base.Domain(risk, segmentation_space)

    plt.figure()
    main_plot_vars(trials=trials, bandit=domain)
    plt.tight_layout()
    plt.savefig(mypath_opt / 'hypopt_vars.pdf')

    plt.figure()
    g = sns.PairGrid(df)  #, hue="connectivity")
    # def hist(x, **kwargs):
    #   plt.hist(x, stacked=True, **kwargs)
    g.map_diag(plt.hist)
    g.map_upper(plt.scatter)
    g.map_lower(sns.kdeplot, cmap="Blues_d")
    # g.map(plt.scatter)
    g.add_legend()
    plt.savefig(mypath_opt / 'hypopt_seaborn_004.pdf')

    if False:
        plt.figure()
        ax = plt.subplot(gspec[0])
        sns.swarmplot(x='connectivity', y='loss', data=df, ax=ax)
        ax = plt.subplot(gspec[1])
        sns.swarmplot(x='nuc_mask', y='loss', data=df, ax=ax)
        ax = plt.subplot(gspec[2])
        sns.swarmplot(x='nuc_seed', y='loss', data=df, ax=ax)
        ax = plt.subplot(gspec[3])
        sns.swarmplot(x='compactness', y='loss', data=df, ax=ax)
        plt.savefig(mypath_opt / 'hypopt_seaborn_005.pdf')

        fig = plt.figure(figsize=(16, 4))
        gspec = matplotlib.gridspec.GridSpec(1, 4)  #, width_ratios=[3,1])
        # gspec.update(wspace=1, hspace=1)
        cmap = np.array(sns.color_palette('deep'))
        conn = df.connectivity.as_matrix()
        colors = cmap[conn.flat].reshape(conn.shape + (3, ))
        ax0 = plt.subplot(gspec[0])
        ax0.scatter(df["compactness"], df.loss, c=colors)
        plt.ylabel('loss')

        ax1 = plt.subplot(gspec[1], sharey=ax0)
        ax1.scatter(df["nuc_mask"], df.loss, c=colors)
        plt.setp(ax1.get_yticklabels(), visible=False)

        ax2 = plt.subplot(gspec[2], sharey=ax0)
        ax2.scatter(df["nuc_seed"], df.loss, c=colors)
        plt.setp(ax2.get_yticklabels(), visible=False)

        ax3 = plt.subplot(gspec[3])
        sns.swarmplot(x="connectivity", y="loss", data=df, ax=ax3)
        plt.setp(ax3.get_yticklabels(), visible=False)
        plt.savefig(mypath_opt / 'hypopt_seaborn_006.pdf')

    return best
def plot_hyperopt(trials_file, time_file, res_file):
    """
    load trials file and plot all results
    :param trials_file: path of the trials.pkl file
    """
    trials = joblib.load(trials_file)
    main_plot_history(trials=trials)
    main_plot_histogram(trials=trials)
    main_plot_vars(trials=trials)

    # parse trials object
    result = {'f1score': []}
    i = 0
    for configuration in trials.miscs:
        for hyperparam in configuration['vals']:
            idx = configuration['vals'][hyperparam][0]
            if hyperparam not in result:
                result[hyperparam] = []
            result[hyperparam].append(space[hyperparam][idx])
        result['f1score'].append(-1 * trials.results[i]['loss'])
        i += 1
    print(result)
    # plot results
    plot_results(result)

    times = pd.read_csv(time_file, delimiter=' ', header=None)
    times.columns = ['h', 'm', 's']
    print(times)
    times['time'] = times['h'] + times['m'] / 60 + times['s'] / 60 / 60
    times = times.drop(['h', 'm', 's'], axis=1)
    print(times)
    results = pd.DataFrame(result)
    print(results)

    result = pd.concat([results, times], axis=1)
    print(result)

    print(result)
    result.to_csv(res_file, index=False)
    tokenized_result = result

    tokenize_bb = lambda s: 0 if s == "resnet101" else (
        1 if s == "resnet50\nbatch size1" else 2)
    tokenize_opt = lambda s: 0 if s == "SGD" else 1
    tokenized_result['tok_backbone'] = tokenized_result['backbone'].apply(
        tokenize_bb)
    tokenized_result['tok_optimizer'] = tokenized_result['optimizer'].apply(
        tokenize_opt)
    tokenized_result = tokenized_result.drop(['backbone', 'optimizer'], axis=1)
    tokenized_result = tokenized_result.rename(
        columns={
            'tok_backbone': 'backbone',
            'tok_optimizer': 'optimizer',
            'detection_min_confidence': 'dmc',
            'train_rois_per_image': 'rois'
        })
    print(tokenized_result)
    correlation = tokenized_result.corr(method='pearson')
    focus_cols = ['f1score', 'time']
    correlation = correlation.filter(focus_cols).drop(focus_cols)

    print(correlation)
    correlation.to_csv(res_file + ".corr.csv", index=False)

    plot_correlation(correlation)
Пример #6
0
#!/usr/bin/env python
import cPickle
import sys
from hyperopt.plotting import main_plot_history

trials = cPickle.load(open(sys.argv[1]))

main_plot_history(trials)

Пример #7
0
global ITERATION, BEST_LOSS, EPOCHS, STEPS_PER_EPOCH

BEST_LOSS = 100
ITERATION = 0
EPOCHS = 100
STEPS_PER_EPOCH = 50

# Optimize, find the smallest loss from fn, with space, using algo
# for MAX_EVALS steps, collate results in bayes_trials
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=MAX_EVALS,
            trials=bayes_trials)

#make some plots and save our trials object
#we can reload the trials object and keep training
#or just look at the pretty results

plotting.main_plot_history(bayes_trials)
plt.savefig('Data/Scratch/GalZoo2/bayes_trials_main_history.png')

plotting.main_plot_histogram(bayes_trials)
plt.savefig('Data/Scratch/GalZoo2/bayes_trials_main_histogram.png')

plotting.main_plot_vars(bayes_trials)
plt.savefig('Data/Scratch/GalZoo2/bayes_trials_main_plot_vars.png')

pickle.dump(bayes_trials,
            open('Data/Scratch/GalZoo2/bayes_Trials_database.p', 'wb'))