def analysis(name): files = glob.glob("{}*.hyperopt".format(name)) trials = None for file in files: opt_data = pickle.load(open(file, "rb")) _trials = opt_data["trials"] if trials is None: trials = _trials else: for attr, value in _trials.__dict__.items(): if isinstance(getattr(_trials, attr), list) and not attr.startswith("__"): setattr(trials, attr, getattr(trials, attr) + getattr(_trials, attr)) print() main_plot_history( trials=trials, algo=tpe.suggest, ) main_plot_histogram( trials=trials, algo=tpe.suggest, ) plot_vars(trials)
"max_evals": 20 }) res.get() # %% from hyperopt.plotting import main_plot_vars, main_plot_history, main_plot_histogram import matplotlib.pylab as plt for sp, trls in zip([space_lgb], [trials_lgb]): domain = base.Domain(lgb_run, sp) # plt.figure(figsize=(20, 40)) # main_plot_vars(trls, bandit=domain, colorize_best=30, columns=1) plt.figure(figsize=(20, 5)) # plt.ylim((-0.003, 0.003)) main_plot_history(trls, bandit=domain) # NOTE: 对trials_lgb的性能的评判应该是VA的第一个metric指标的负数,至少在这个例子里是这样 trials_lgb.trials # 所有模型的参数和结果的字典组成的一个list trials_lgb.results # 返回所有实验的结果 trials_lgb.miscs # 返回所有实验的参数 trials_lgb.vals # 返回所有实验的跟space更新有关的参数 trials_lgb.trials[0]['misc']['vals'] tmp1 = space_lgb['lgb_param']['bagging_fraction'] tmp2 = 2**sample_int("v_lambda_l1", 0, 2) - 1 5**-1.6 # 0.07614615754863513 type(tmp2), dir(tmp2)
'hyperopt_curve_fitting_trials_report.csv') trials_summary.to_csv(trials_log_file) print('-> trials summary report written here : ' + trials_log_file) if finished_jobs > 0: trials_summary.plot(subplots=True, x='tid') plt.legend(loc='best') plt.savefig(os.path.join(outputlog_folder, 'hparams_vs_trials')) #draw the scatter matrix scatter_matrix(trials_summary[list(trial['misc']['vals']) + ['loss']], alpha=0.2, figsize=(6, 6), diagonal='kde') plt.savefig(os.path.join(outputlog_folder, 'scatter_matrix')) plt.figure() plotting.main_plot_history(trials) plt.savefig(os.path.join(outputlog_folder, 'hyperopt_history')) plt.figure() plotting.main_plot_histogram(trials) plt.savefig(os.path.join(outputlog_folder, 'hyperopt_histogram')) #plotting.main_plot_vars(trials, scope)#FIXME, check https://github.com/hyperopt/hyperopt/blob/master/hyperopt/tests/test_plotting.py try: plt.show() except: print( 'matplotlib.pyplot.show() could not be run, maybe this computer does not have a X server' ) pass else: print( '-> Could not run any trial correctly... look for errors, first try to start a single experiment manually'
def optimize_segmentation(pimg, rawdata, segparams, mypath_opt): inds = rawdata['inds_labeled_slices'] ## recompute gt_slices! don't use gt_slices from rawdata gt_slices = rawdata['gt_slices'] stack_segmentation_function = segparams['function'] segmentation_space = segparams['space'] segmentation_info = segparams['info'] n_evals = segparams['n_evals'] img_instseg = pimg #[[0]] ## optimization params # mypath_opt = add_numbered_directory(savepath, 'opt') def risk(params): print('Evaluating params:', params) t0 = time() hyp = np.array( [stack_segmentation_function(x, params) for x in img_instseg]) hyp = hyp.astype(np.uint16) pred_slices = hyp[inds[0], inds[1]] res = np.array([ss.seg(x, y) for x, y in zip(gt_slices, pred_slices)]) t1 = time() val = res.mean() print("SEG: ", val) return -val ## perform the optimization trials = ho.Trials() best = ho.fmin(risk, space=segmentation_space, algo=ho.tpe.suggest, max_evals=n_evals, trials=trials) pickle.dump(trials, open(mypath_opt / 'trials.pkl', 'wb')) print(best) losses = [x['loss'] for x in trials.results if x['status'] == 'ok'] df = pd.DataFrame({**trials.vals}) df = df.iloc[:len(losses)] df['loss'] = losses ## save the results def save_img(): plt.figure() # plt.scatter(ps[0], ps[1], c=values) n = segmentation_info['name'] p0 = segmentation_info['param0'] p1 = segmentation_info['param1'] x = np.array(trials.vals[p0]) y = np.array(trials.vals[p1]) c = np.array([ t['loss'] for t in trials.results if t.get('loss', None) is not None ]) plt.scatter(x[:c.shape[0]], y[:c.shape[0]], c=c) plt.title(n) plt.xlabel(p0) plt.ylabel(p1) plt.colorbar() filename = '_'.join([n, p0, p1]) plt.savefig(mypath_opt / (filename + '.png')) save_img() from hyperopt.plotting import main_plot_vars from hyperopt.plotting import main_plot_history from hyperopt.plotting import main_plot_histogram plt.figure() main_plot_histogram(trials=trials) plt.savefig(mypath_opt / 'hypopt_histogram.pdf') plt.figure() main_plot_history(trials=trials) plt.savefig(mypath_opt / 'hypopt_history.pdf') domain = ho.base.Domain(risk, segmentation_space) plt.figure() main_plot_vars(trials=trials, bandit=domain) plt.tight_layout() plt.savefig(mypath_opt / 'hypopt_vars.pdf') plt.figure() g = sns.PairGrid(df) #, hue="connectivity") # def hist(x, **kwargs): # plt.hist(x, stacked=True, **kwargs) g.map_diag(plt.hist) g.map_upper(plt.scatter) g.map_lower(sns.kdeplot, cmap="Blues_d") # g.map(plt.scatter) g.add_legend() plt.savefig(mypath_opt / 'hypopt_seaborn_004.pdf') if False: plt.figure() ax = plt.subplot(gspec[0]) sns.swarmplot(x='connectivity', y='loss', data=df, ax=ax) ax = plt.subplot(gspec[1]) sns.swarmplot(x='nuc_mask', y='loss', data=df, ax=ax) ax = plt.subplot(gspec[2]) sns.swarmplot(x='nuc_seed', y='loss', data=df, ax=ax) ax = plt.subplot(gspec[3]) sns.swarmplot(x='compactness', y='loss', data=df, ax=ax) plt.savefig(mypath_opt / 'hypopt_seaborn_005.pdf') fig = plt.figure(figsize=(16, 4)) gspec = matplotlib.gridspec.GridSpec(1, 4) #, width_ratios=[3,1]) # gspec.update(wspace=1, hspace=1) cmap = np.array(sns.color_palette('deep')) conn = df.connectivity.as_matrix() colors = cmap[conn.flat].reshape(conn.shape + (3, )) ax0 = plt.subplot(gspec[0]) ax0.scatter(df["compactness"], df.loss, c=colors) plt.ylabel('loss') ax1 = plt.subplot(gspec[1], sharey=ax0) ax1.scatter(df["nuc_mask"], df.loss, c=colors) plt.setp(ax1.get_yticklabels(), visible=False) ax2 = plt.subplot(gspec[2], sharey=ax0) ax2.scatter(df["nuc_seed"], df.loss, c=colors) plt.setp(ax2.get_yticklabels(), visible=False) ax3 = plt.subplot(gspec[3]) sns.swarmplot(x="connectivity", y="loss", data=df, ax=ax3) plt.setp(ax3.get_yticklabels(), visible=False) plt.savefig(mypath_opt / 'hypopt_seaborn_006.pdf') return best
def plot_hyperopt(trials_file, time_file, res_file): """ load trials file and plot all results :param trials_file: path of the trials.pkl file """ trials = joblib.load(trials_file) main_plot_history(trials=trials) main_plot_histogram(trials=trials) main_plot_vars(trials=trials) # parse trials object result = {'f1score': []} i = 0 for configuration in trials.miscs: for hyperparam in configuration['vals']: idx = configuration['vals'][hyperparam][0] if hyperparam not in result: result[hyperparam] = [] result[hyperparam].append(space[hyperparam][idx]) result['f1score'].append(-1 * trials.results[i]['loss']) i += 1 print(result) # plot results plot_results(result) times = pd.read_csv(time_file, delimiter=' ', header=None) times.columns = ['h', 'm', 's'] print(times) times['time'] = times['h'] + times['m'] / 60 + times['s'] / 60 / 60 times = times.drop(['h', 'm', 's'], axis=1) print(times) results = pd.DataFrame(result) print(results) result = pd.concat([results, times], axis=1) print(result) print(result) result.to_csv(res_file, index=False) tokenized_result = result tokenize_bb = lambda s: 0 if s == "resnet101" else ( 1 if s == "resnet50\nbatch size1" else 2) tokenize_opt = lambda s: 0 if s == "SGD" else 1 tokenized_result['tok_backbone'] = tokenized_result['backbone'].apply( tokenize_bb) tokenized_result['tok_optimizer'] = tokenized_result['optimizer'].apply( tokenize_opt) tokenized_result = tokenized_result.drop(['backbone', 'optimizer'], axis=1) tokenized_result = tokenized_result.rename( columns={ 'tok_backbone': 'backbone', 'tok_optimizer': 'optimizer', 'detection_min_confidence': 'dmc', 'train_rois_per_image': 'rois' }) print(tokenized_result) correlation = tokenized_result.corr(method='pearson') focus_cols = ['f1score', 'time'] correlation = correlation.filter(focus_cols).drop(focus_cols) print(correlation) correlation.to_csv(res_file + ".corr.csv", index=False) plot_correlation(correlation)
#!/usr/bin/env python import cPickle import sys from hyperopt.plotting import main_plot_history trials = cPickle.load(open(sys.argv[1])) main_plot_history(trials)
global ITERATION, BEST_LOSS, EPOCHS, STEPS_PER_EPOCH BEST_LOSS = 100 ITERATION = 0 EPOCHS = 100 STEPS_PER_EPOCH = 50 # Optimize, find the smallest loss from fn, with space, using algo # for MAX_EVALS steps, collate results in bayes_trials best = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=MAX_EVALS, trials=bayes_trials) #make some plots and save our trials object #we can reload the trials object and keep training #or just look at the pretty results plotting.main_plot_history(bayes_trials) plt.savefig('Data/Scratch/GalZoo2/bayes_trials_main_history.png') plotting.main_plot_histogram(bayes_trials) plt.savefig('Data/Scratch/GalZoo2/bayes_trials_main_histogram.png') plotting.main_plot_vars(bayes_trials) plt.savefig('Data/Scratch/GalZoo2/bayes_trials_main_plot_vars.png') pickle.dump(bayes_trials, open('Data/Scratch/GalZoo2/bayes_Trials_database.p', 'wb'))