def averaged_runs(µ, σ, ε, selector, provider_factory, initializer, dont_average_reward): exs = [ Experiment(µ, σ, ε, ctx().num_arms, selector, provider_factory, initializer, dont_average_reward) for _ in range(ctx().num_runs) ] _ = [ex.run(ctx().num_pulls_per_run) for ex in tqdm(exs)] rewards = np.average(np.stack([ex.rewards for ex in exs], axis=1), axis=1) percent_optimal_actions = 100 * np.average( np.stack([ex.optimality for ex in exs], axis=1), axis=1) return rewards, percent_optimal_actions
def run_experiment(): results = [ averaged_runs(ctx().ms[i], ctx().σs[i], ctx().εs[i], ctx().selectors[i](ctx().εs[i], ctx().num_arms), ctx().providers[i], ctx().initializers[i](), ctx().dont_average_reward) for i in range(len(ctx().εs)) ] rewards = [r[0] for r in results] percent_optimals = [r[1] for r in results] return rewards, percent_optimals
from munch import munchify %matplotlib inline #─────────────────────────────────────────────────────────────────────── # Experiment Start #─────────────────────────────────────────────────────────────────────── use_context(1) before = t.perf_counter() rewards, percent_optimals = run_experiment() print("time (seconds): {0}".format(t.perf_counter() - before)) #─────────────────────────────────────────────────────────────────────── # Plotting #─────────────────────────────────────────────────────────────────────── labels = ["ε={0}{1}".format(ε, (" (greedy)" if ε == 0 else "")) for ε in ctx().εs] pp.figure(figsize=(10,4)) [pp.plot(rewards[i], label=labels[i]) for i in range(len(ctx().εs))] pp.legend(bbox_to_anchor=(1.2, 0.5)) pp.xlabel("Steps") pp.ylabel("Average Reward") pp.title("Average ε-greedy Rewards over " + str(ctx().num_runs) + " Runs") pp.show() pp.figure(figsize=(10,4)) [pp.plot(percent_optimals[i], label=labels[i]) for i in range(len(ctx().εs))] pp.legend(bbox_to_anchor=(1.2, 0.5)) pp.xlabel("Steps") pp.ylabel("% Optimal Action") pp.title("% times optical action selected averaged over " + str(ctx().num_runs) + " Runs") pp.show()
from BanditMachine import BanditMachine from Experiment import run_experiment from Insanity.Initializers import OptimisticInitializer, ZeroInitializer from ActionValuesProvider import ActionValuesProvider from Context import ctx, use_context from munch import munchify %matplotlib inline #─────────────────────────────────────────────────────────────────────── # Experiment Start #─────────────────────────────────────────────────────────────────────── use_context(2) before = t.perf_counter() rewards, percent_optimals = run_experiment() print("time (seconds): {0}".format(t.perf_counter() - before)) #─────────────────────────────────────────────────────────────────────── # Plotting #─────────────────────────────────────────────────────────────────────── labels = ["ε={0}{1}".format(ε, (" (greedy, optimistic, Q₁=5), " if ε == 0 else " realistic, Q₁=0")) for ε in ctx().εs] pp.figure(figsize=(10,4)) [pp.plot(percent_optimals[i], label=labels[i]) for i in range(len(ctx().εs))] pp.legend(bbox_to_anchor=(1.2, 0.5)) pp.xlabel("Steps") pp.ylabel("% Optimal Action") pp.title("% times optical action selected averaged over " + str(ctx().num_runs) + " Runs") pp.show()
import numpy as np from tqdm import tqdm from BanditMachine import BanditMachine from Experiment import run_experiment from Insanity.Initializers import OptimisticInitializer, ZeroInitializer from ActionValuesProvider import ActionValuesProvider from Context import ctx, use_context from munch import munchify %matplotlib inline #─────────────────────────────────────────────────────────────────────── # Experiment Start #─────────────────────────────────────────────────────────────────────── use_context(4) before = t.perf_counter() rewards, percent_optimals = run_experiment() print("time (seconds): {0}".format(t.perf_counter() - before)) #─────────────────────────────────────────────────────────────────────── # Plotting #─────────────────────────────────────────────────────────────────────── labels = ["α={0}{1}".format(α, " (no baseline)" if i > 1 else "") for i,α in enumerate([0.1, 0.4, 0.1, 0.4])] pp.figure(figsize=(10,4)) [pp.plot(percent_optimals[i], label=labels[i]) for i in range(len(ctx().εs))] pp.legend(bbox_to_anchor=(1.2, 0.5)) pp.xlabel("Steps") pp.ylabel("% Optimal Action") pp.title("Gradient Bandit (averaged over " + str(ctx().num_runs) + " Runs)") pp.show()