def plot_selected_hp(trace, hp_name_x, hp_name_y): prob_dict, = get_column_list(trace.db.salad, 'prob' ) hp_id_list, hp_dict_list = get_column_list(trace.db.eval_info, 'hp_id', 'hp_' ) hp_map = { hp_id : hp_dict for hp_id, hp_dict in zip( hp_id_list, hp_dict_list ) } point_list= [] for hp_id, prob in prob_dict[-1]: hp_dict = hp_map[hp_id] x = hp_dict[hp_name_x ] y = hp_dict[hp_name_y ] point_list.append((x,y,prob)) x,y,prob = np.array(point_list).T pp.scatter(x,y, s=prob/min(prob)*20,c='b', marker='+',linewidths=0.5) i, amin_list, chosen = get_column_list(trace.db.argmin, 'i', 'argmin_list', 'chosen_hp_id' ) idx = max(i) amin_list = amin_list[idx] hp = hp_map[chosen[idx]] pp.scatter(hp[hp_name_x],hp[hp_name_y],s=200, marker='+',c='g',linewidths=1)
def plot_time(trace, axes=None): if axes is None: axes = pp.gca() hp_id, choose_time = get_column_list(trace.db.candidates, 'hp_id', 'choose.time' ) hp_id_analyse, analyse_time = get_column_list(trace.db.analyze, 'hp_id', 'analyse_time' ) hp_id_duration, duration = get_column_list(trace.db.eval_info, 'hp_id', 'time' ) max_len = min( (len(hp_id), len(hp_id_analyse), len(hp_id_duration) ) ) choose_time = choose_time[:max_len] analyse_time = analyse_time[:max_len] duration = duration[:max_len] hp_id_map = dict( zip( hp_id, range( len(hp_id)) ) ) analyse_time = remap( hp_id_map, hp_id_analyse, analyse_time ) duration = remap( hp_id_map, hp_id_duration, duration ) i = np.arange( max_len ) axes.bar(i,analyse_time, color='r', label = 'analyse time' ) axes.bar(i,choose_time, bottom = analyse_time, color='b', label='choose time' ) axes.bar(i,duration, bottom = analyse_time+choose_time, color='g', label='learn time' ) axes.set_xlabel('iteration') axes.set_ylabel('time (s)' ) axes.legend(loc='best') axes.set_title('time per iteration for different components')
def plot_selected_hp_trace(trace): prob_dict, = get_column_list(trace.db.salad, 'prob' ) hp_id_list, = get_column_list(trace.db.eval_info, 'hp_id' ) hp_id_map = dict( zip( hp_id_list, range(len(hp_id_list)))) prob_mat = np.zeros( (len(hp_id_list),len(prob_dict)) ) for j, prob_list in enumerate( prob_dict): for hp_id, p in prob_list: i = hp_id_map[hp_id] prob_mat[ i, j ] = p pp.imshow(prob_mat, origin='lower', aspect='auto', cmap='binary', interpolation='nearest' ) col_list = get_column_list(trace.db.argmin, 'i', 'argmin_list', 'chosen_hp_id' ) for i, argmin_list, chosen_hp_id in zip(*col_list): idxL = np.array([ hp_id_map[hp_id] for hp_id in argmin_list ]) pp.scatter( [i]*len(idxL), idxL, 2 , color='blue',alpha=0.2 ) # plot all candidates pp.scatter( i, hp_id_map[chosen_hp_id], 10, facecolors='none', edgecolors='r' ) # plot the chosen one pp.xlabel('iteration') pp.ylabel('candidate') pp.title('The chosen candidate for each iteration' )
def load_eval_info(trace_path): """ Extract from the trace the predictions on the test and the validation sets for every models. """ trace = pkl_trace.TraceDBFile(trace_path) y_tst_N, y_val_N = pkl_trace.get_column_list(trace.db.eval_info, "tst.y", "val.y") y_tst_m, y_val_m = pkl_trace.get_column_list(trace.db.y, "tst", "val") metric, ds_name, hp_space = pkl_trace.get_column_list(trace.db.main, "__dict__.metric", "ds_name", "hp_space") tst_eval_info = EvalInfo(y_tst_m[0], y_tst_N, metric[0], ds_name[0], hp_space[0].name) val_eval_info = EvalInfo(y_val_m[0], y_val_N, metric[0], ds_name[0], hp_space[0].name) return tst_eval_info, val_eval_info
def sign_test_over_time(trace_list, key_A='salad_risk.tst', key_B='argmin_risk.tst'): wins = np.zeros(1) lose = np.zeros(1) for trace in trace_list: a, b = get_column_list(trace.db.analyze, key_A, key_B) a = np.array(a) b = np.array(b) n = max(len(a), len(wins)) if n > len(wins): print 'resizing', wins.shape wins.resize(n) lose.resize(n) elif n > len(a): a.resize(n) b.resize(n) print wins.shape, lose.shape, a.shape, b.shape # mask = a!=b wins[a < b] += 1. lose[a > b] += 1 # wins[a==b] += 0.5 # count[:n] += 1 return wins, wins + lose
def sign_test_over_time(trace_list, key_A='salad_risk.tst', key_B='argmin_risk.tst' ): wins = np.zeros(1) lose = np.zeros(1) for trace in trace_list: a, b = get_column_list( trace.db.analyze, key_A, key_B ) a = np.array(a) b = np.array(b) n = max( len(a), len(wins) ) if n > len(wins): print 'resizing', wins.shape wins.resize(n) lose.resize(n) elif n > len(a): a.resize(n) b.resize(n) print wins.shape, lose.shape, a.shape, b.shape # mask = a!=b wins[a<b] += 1. lose[a>b] += 1 # wins[a==b] += 0.5 # count[:n] += 1 return wins, wins+lose
def load_eval_info(trace_path): """ Extract from the trace the predictions on the test and the validation sets for every models. """ trace = pkl_trace.TraceDBFile(trace_path) y_tst_N, y_val_N = pkl_trace.get_column_list(trace.db.eval_info, 'tst.y', 'val.y') y_tst_m, y_val_m = pkl_trace.get_column_list(trace.db.y, 'tst', 'val') metric, ds_name, hp_space = pkl_trace.get_column_list( trace.db.main, '__dict__.metric', 'ds_name', 'hp_space') tst_eval_info = EvalInfo(y_tst_m[0], y_tst_N, metric[0], ds_name[0], hp_space[0].name) val_eval_info = EvalInfo(y_val_m[0], y_val_N, metric[0], ds_name[0], hp_space[0].name) return tst_eval_info, val_eval_info
def __init__(self, trace): self.hp_space = get_column_list(trace.db.main, 'hp_space' )[0][0] self.trace = trace self.hp_id_list, unit_list, chooser_state_list, i = get_column_list( trace.db.candidates, 'hp_id', 'unit_value', 'chooser_state', 'i') self.chooser_state = chooser_state_list[np.argmax(i)] self.hp_id_map = make_map(self.hp_id_list) self.unit_grid = np.array(unit_list) row_list = [] for unit_value in self.unit_grid: hp_conf = HpConfiguration(self.hp_space, unit_value) hp_keys, val = zip( *hp_conf.var_list() ) row_list.append(val) self.col_list = [ np.array(col) for col in zip(*row_list)] self.hp_keys = hp_keys self.hp_key_map = make_map(hp_keys)
def __init__(self, trace): self.hp_space = get_column_list(trace.db.main, 'hp_space')[0][0] self.trace = trace self.hp_id_list, unit_list, chooser_state_list, i = get_column_list( trace.db.candidates, 'hp_id', 'unit_value', 'chooser_state', 'i') self.chooser_state = chooser_state_list[np.argmax(i)] self.hp_id_map = make_map(self.hp_id_list) self.unit_grid = np.array(unit_list) row_list = [] for unit_value in self.unit_grid: hp_conf = HpConfiguration(self.hp_space, unit_value) hp_keys, val = zip(*hp_conf.var_list()) row_list.append(val) self.col_list = [np.array(col) for col in zip(*row_list)] self.hp_keys = hp_keys self.hp_key_map = make_map(hp_keys)
def plot_selected_hp(trace, hp_name_x, hp_name_y): prob_dict, = get_column_list(trace.db.salad, 'prob') hp_id_list, hp_dict_list = get_column_list(trace.db.eval_info, 'hp_id', 'hp_') hp_map = { hp_id: hp_dict for hp_id, hp_dict in zip(hp_id_list, hp_dict_list) } point_list = [] for hp_id, prob in prob_dict[-1]: hp_dict = hp_map[hp_id] x = hp_dict[hp_name_x] y = hp_dict[hp_name_y] point_list.append((x, y, prob)) x, y, prob = np.array(point_list).T pp.scatter(x, y, s=prob / min(prob) * 20, c='b', marker='+', linewidths=0.5) i, amin_list, chosen = get_column_list(trace.db.argmin, 'i', 'argmin_list', 'chosen_hp_id') idx = max(i) amin_list = amin_list[idx] hp = hp_map[chosen[idx]] pp.scatter(hp[hp_name_x], hp[hp_name_y], s=200, marker='+', c='g', linewidths=1)
def plot_time(trace, axes=None): if axes is None: axes = pp.gca() hp_id, choose_time = get_column_list(trace.db.candidates, 'hp_id', 'choose.time') hp_id_analyse, analyse_time = get_column_list(trace.db.analyze, 'hp_id', 'analyse_time') hp_id_duration, duration = get_column_list(trace.db.eval_info, 'hp_id', 'time') max_len = min((len(hp_id), len(hp_id_analyse), len(hp_id_duration))) choose_time = choose_time[:max_len] analyse_time = analyse_time[:max_len] duration = duration[:max_len] hp_id_map = dict(zip(hp_id, range(len(hp_id)))) analyse_time = remap(hp_id_map, hp_id_analyse, analyse_time) duration = remap(hp_id_map, hp_id_duration, duration) i = np.arange(max_len) axes.bar(i, analyse_time, color='r', label='analyse time') axes.bar(i, choose_time, bottom=analyse_time, color='b', label='choose time') axes.bar(i, duration, bottom=analyse_time + choose_time, color='g', label='learn time') axes.set_xlabel('iteration') axes.set_ylabel('time (s)') axes.legend(loc='best') axes.set_title('time per iteration for different components')
def plot_selected_hp_trace(trace): prob_dict, = get_column_list(trace.db.salad, 'prob') hp_id_list, = get_column_list(trace.db.eval_info, 'hp_id') hp_id_map = dict(zip(hp_id_list, range(len(hp_id_list)))) prob_mat = np.zeros((len(hp_id_list), len(prob_dict))) for j, prob_list in enumerate(prob_dict): for hp_id, p in prob_list: i = hp_id_map[hp_id] prob_mat[i, j] = p pp.imshow(prob_mat, origin='lower', aspect='auto', cmap='binary', interpolation='nearest') col_list = get_column_list(trace.db.argmin, 'i', 'argmin_list', 'chosen_hp_id') for i, argmin_list, chosen_hp_id in zip(*col_list): idxL = np.array([hp_id_map[hp_id] for hp_id in argmin_list]) pp.scatter([i] * len(idxL), idxL, 2, color='blue', alpha=0.2) # plot all candidates pp.scatter(i, hp_id_map[chosen_hp_id], 10, facecolors='none', edgecolors='r') # plot the chosen one pp.xlabel('iteration') pp.ylabel('candidate') pp.title('The chosen candidate for each iteration')
def plot_eval_info(plot, hp_info, y_keys, perm=None): y_dict = get_column_dict(hp_info.trace.db.eval_info, 'hp_id', *y_keys) idx = hp_info.map_hp_id_list(y_dict.pop('hp_id')) # add the agnostic bayes distribution the the list of traces idx_list, distr_list = get_column_list(hp_info.trace.db.predict, 'i', 'prob') distr = distr_list[np.argmax( idx_list)] # extract the last computed distribution y_dict['AB probability'] = unpack_prob(distr, hp_info, len(idx)) if len(idx) == 0: print 'no results yet' return gp = MyGP(mcmc_iters=0, noiseless=False) gp.set_hypers(hp_info.chooser_state) for key in y_keys: y_dict[key] = np.array(y_dict[key]) # print '%s.shape:'%y_key, y.shape X = hp_info.unit_grid[idx, :] hp_keys = hp_info.hp_keys print hp_keys if perm is not None: X = X[:, perm] hp_keys = [hp_keys[i] for i in perm] hp_keys = [clean_hp_name(hp_key) for hp_key in hp_keys] print hp_keys plot.set_info(X, y_dict, 'val.risk', hp_keys, hp_info.hp_space.var_list, gp)
def plot_eval_info( plot, hp_info, y_keys, perm = None ): y_dict = get_column_dict( hp_info.trace.db.eval_info, 'hp_id', *y_keys ) idx = hp_info.map_hp_id_list(y_dict.pop('hp_id')) # add the agnostic bayes distribution the the list of traces idx_list, distr_list = get_column_list( hp_info.trace.db.predict, 'i', 'prob' ) distr = distr_list[ np.argmax(idx_list) ] # extract the last computed distribution y_dict['AB probability'] = unpack_prob( distr, hp_info, len(idx)) if len(idx) == 0: print 'no results yet' return gp = MyGP(mcmc_iters=0, noiseless=False) gp.set_hypers(hp_info.chooser_state) for key in y_keys: y_dict[key] = np.array(y_dict[key]) # print '%s.shape:'%y_key, y.shape X = hp_info.unit_grid[idx,:] hp_keys = hp_info.hp_keys print hp_keys if perm is not None: X = X[:,perm] hp_keys = [hp_keys[i] for i in perm ] hp_keys = [ clean_hp_name(hp_key) for hp_key in hp_keys ] print hp_keys plot.set_info(X, y_dict, 'val.risk',hp_keys, hp_info.hp_space.var_list, gp)