def baseline(ax, exp_path, values, bounds): exp = loadExperiment(path) results = loadResults(exp, errorfile) if 'Regh' in exp.agent: results = whereParameterEquals(results, 'reg_h', 0.8) results = whereParameterEquals(results, 'ratio', 1) elif 'TDC' in exp.agent or 'GTD2' in exp.agent: results = whereParameterEquals(results, 'ratio', 1) if bestBy == 'end': metric = lambda m: np.mean(m[-int(m.shape[0] * .1):]) best = getBestEnd(results) elif bestBy == 'auc': metric = np.mean best = getBest(results) color = colors[exp.agent] label = exp.agent m = metric(best.mean()) low = min(values) high = max(values) ax.hlines(m, low, high, color=color, label=label, linewidth=4, linestyle=':') bounds.append((m, m))
def generatePlotTTA(ax, exp_path, bounds): exp = loadExperiment(exp_path) results = loadResults(exp, errorfile) const, unconst = tee(results) color = colors[exp.agent] label = rename(exp.agent) if 'ReghTDC' in exp.agent: const = whereParameterEquals(const, 'ratio', 1) const = whereParameterEquals(const, 'reg_h', 1) elif 'TDRCC' in exp.agent: const = whereParameterEquals(const, 'ratio', 1) # const = whereParameterEquals(const, 'reg_h', 0.8) const = whereParameterGreaterEq(const, 'reg_h', 0.01) elif 'TDC' in exp.agent: const = whereParameterGreaterEq(const, 'ratio', 1) if show_unconst: b = plotSensitivity(unconst, param, ax, stderr=stderr, color=color, label=label + '_unc', bestBy=bestBy, dashed=True) bounds.append(b) b = plotSensitivity(const, param, ax, stderr=stderr, color=color, label=label, bestBy=bestBy) bounds.append(b)
def generatePlotTTA(ax, exp_path, bounds): exp = loadExperiment(exp_path) results = loadResults(exp, errorfile) const, unconst = tee(results) color = colors[exp.agent] label = exp.agent const = whereParameterGreaterEq(const, 'ratio', 1) if 'ReghTDC' in label: const = whereParameterEquals(const, 'reg_h', 0.8) best_const = getBest(const, bestBy=bestBy) best_unconst = getBest(unconst, bestBy=bestBy) if show_unconst and best_const != best_unconst: b = plotBest(best_unconst, ax, window=window, smoothing=smoothing, label=label + '_unc', color=color, alpha=0.2, dashed=True) bounds.append(b) b = plotBest(best_const, ax, window=window, smoothing=smoothing, label=label, color=color, alpha=0.2, dashed=False) bounds.append(b)
def generatePlot(ax, exp_paths, bounds): for exp_path in exp_paths: exp = loadExperiment(exp_path) results = loadResults(exp, errorfile) color = colors[exp.agent] label = exp.agent results = whereParameterEquals(results, 'batch_size', 4) results = whereParameterLesserEq(results, 'ratio', 8) results = whereParameterLesserEq(results, 'alpha', 0.5) results = where(results, lambda r: r.params.get('ratio', 1) * r.params['alpha'] <= 1) if 'ReghTDC' in label: results = whereParameterEquals(results, 'reg_h', 1) results = whereParameterEquals(results, 'ratio', 1) elif 'TDRCC' in label: results = whereParameterEquals(results, 'reg_h', 0.8) results = whereParameterEquals(results, 'ratio', 1) elif 'TDC' in label: results = whereParameterGreaterEq(results, 'ratio', 1) left, right = tee(results) best_line = getBest(right).mean() best = np.mean(best_line) for result in left: # print(label, result.params) shade = 0.12 line = result.mean() if np.mean(line) == best: shade = 1 plotBest(result, ax, label=label, color=color, alphaMain=shade, dashed=False) bounds.append(best_line[0])
def getResultsAndBest(exps): all_results = [] best_result = None for exp in exps: results = loadResults(exp) results = whereParameterEquals(results, 'lambda', LAMBDA) best = getBest(results) all_results.append(best) if best_result is None: best_result = best elif np.mean(best.mean()) < np.mean(best_result.mean()): best_result = best return all_results, best_result
def generatePlotTTA(ax, exp_paths, bounds): for exp_path in exp_paths: if 'amsgrad' in exp_path: continue exp = loadExperiment(exp_path) results = loadResults(exp, errorfile) stepsizes = loadResults(exp, 'stepsize_summary.npy') results = whereParameterEquals(results, 'ratio', 1) color = colors[exp.agent] label = exp.agent best_error = getBest(results) best_stepsize = find(stepsizes, best_error) b = plotBest(best_stepsize, ax, label=[label + '_w', label + '_h'], color=color, dashed=[False, True]) bounds.append(b)
def generatePlotTTA(ax, exp_paths, bestBy, bounds): ax.set_xscale("log", basex=2) for exp_path in exp_paths: exp = loadExperiment(exp_path) results = loadResults(exp, errorfile) results = whereParameterEquals(results, 'reg_h', 6.4) agent = exp.agent if 'SmoothTDC' in agent: average = exp._d['metaParameters']['averageType'] agent += '_' + average color = colors[agent] label = agent b = plotSensitivity(results, 'ratio', ax, color=color, label=label, bestBy=bestBy) bounds.append(b)
if alg == 'htd' and problem in on_policy_problems: curves[i, j, k] = curves[i, td_idx, k] continue if ss == 'constant': exp_paths = glob.glob( f'experiments/stepsizes/{problem}/{alg}/{alg}.json') else: exp_paths = glob.glob( f'experiments/stepsizes/{problem}/{alg}/{alg}{ss}.json' ) exp = loadExperiment(exp_paths[0]) results = loadResults(exp, errorfile) results = whereParameterEquals(results, 'reg_h', 0.8) lc, results = tee(results) best = getBest(lc) x, y, e = getSensitivityData(results, 'ratio', reducer='slice', bestBy='auc') curve = best.mean() curves[i, j, k, :, 0] = np.array(y) / curve[0] curves[i, j, k, :, 1] = np.array(e) * np.sqrt(best.runs()) if j == 0: total_runs += best.runs()
table = np.zeros((len(algorithms), len(problems), 2)) for i, alg in enumerate(algorithms): for j, problem in enumerate(problems): exp_path = f'experiments/stepsizes/{problem}/{alg}/{alg}{stepsize}.json' try: exp = loadExperiment(exp_path) except: continue results = loadResults(exp, errorfile) if alg == 'td' or alg == 'vtrace': const = results else: const = whereParameterGreaterEq(results, 'ratio', 1) const = whereParameterEquals(const, 'reg_h', 0.8) best = getBest(const) metric = np.mean # best = getBestEnd(const) # metric = lambda m: np.mean(m[-(int(len(m))):]) mean = metric(best.mean()) stderr = metric(best.stderr()) table[i, j] = [mean, stderr] htd_idx = indexOf(algorithms, 'htd') vtrace_idx = indexOf(algorithms, 'vtrace') td_idx = indexOf(algorithms, 'td')
table = np.zeros((len(algorithms), len(problems), 2)) for i, alg in enumerate(algorithms): for j, problem in enumerate(problems): print(alg, problem) exp_path = f'experiments/stepsizes/{problem}/{alg}/{alg}{stepsize}.json' try: exp = loadExperiment(exp_path) except: continue results = loadResults(exp, errorfile) if alg == 'td' or alg == 'vtrace': const = results else: const = whereParameterEquals(results, 'ratio', 1) const = whereParameterEquals(const, 'reg_h', 0.8) best = getBest(const) metric = np.mean # best = getBestEnd(const) # metric = lambda m: np.mean(m[-(int(len(m))):]) mean = metric(best.mean()) stderr = metric(best.stderr()) table[i, j] = [mean, stderr] htd_idx = indexOf(algorithms, 'htd') vtrace_idx = indexOf(algorithms, 'vtrace')
td_exp = loadExperiment(f'experiments/reward_scale/{problem}/td/td.json') scales = td_exp._d['metaParameters']['reward_scale'] tdrc_exp = loadExperiment( f'experiments/reward_scale/{problem}/regh_tdc/regh_tdc.json') betas = tdrc_exp._d['metaParameters']['reg_h'] mat = np.zeros((len(scales), len(betas))) xs = [] ys = [] alphas = [] for i, scale in enumerate(scales): td_results = loadResults(td_exp, errorfile) td_results = whereParameterEquals(td_results, 'reward_scale', scale) best_td = getBest(td_results) best_td_mean = np.mean(best_td.mean()) best_td_std = np.mean( np.sqrt(best_td.stderr() * np.sqrt(best_td.runs()))) tdrc_results = loadResults(tdrc_exp, errorfile) tdrc_results = whereParameterEquals(tdrc_results, 'reward_scale', scale) tdrc_split = splitOverParameter(tdrc_results, 'reg_h') for j, beta in enumerate(betas): results = tdrc_split[beta] best_tdrc = getBest(results) mean = np.mean(best_tdrc.mean())
def generatePlot(ax, exp_paths, ss, problem): # load results all_performance = {} for exp_path in exp_paths: try: exp = loadExperiment(exp_path) except: continue results = loadResults(exp, errorfile) results = whereParameterEquals(results, 'reg_h', 0.8) color = colors[exp.agent] label = exp.agent performance = [] for r in results: curve = r.mean() m = np.mean(curve) # diverged if result doesn't exist if np.isscalar(curve): m = np.nan # diverged if the end of the curve is higher than the start elif curve[0] < curve[curve.shape[0] - 1]: m = np.nan # diverged if mean is larger than start # elif curve[0] < m: # m = np.nan performance.append(m) performance = np.array(performance) all_performance[label] = {'res': performance, 'color': color} # find max among all algorithms global_max = -np.inf for key in all_performance: data = all_performance[key] performance = data['res'] local_max = np.nanmax(performance) if local_max > global_max: global_max = local_max # plot results x_offset = 0.5 x_ticks = [] x_labels = [] for key in all_performance: data = all_performance[key] label = key performance = data['res'] color = data['color'] num_diverged = sum(np.isnan(performance)) num_total = performance.shape[0] diverged_file.write( f'ss: {ss}; problem: {problem}; alg: {label}; perc: {num_diverged / num_total}; div: {num_diverged}; total: {num_total}\n' ) performance[np.isnan(performance)] = global_max * DIVERGENCE_MULTIPLIER ax.scatter( [x_offset] * performance.shape[0] + np.random.uniform(-LANE_WIDTH, LANE_WIDTH, performance.shape[0]), performance, marker='o', facecolors='none', color=color) x_ticks.append(x_offset) x_labels.append(label) x_offset += ALG_WIDTH + 2 * LANE_WIDTH ax.xaxis.set_ticks(x_ticks) ax.set_xticklabels(x_labels)