# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
from algorithms.softmax import Softmax
from arms.bernoulli import BernoulliArm
from tests.test_framework import test_algorithm

algo = Softmax(0.1, 5)
means = [0.1, 0.1, 0.1, 0.1, 0.9]
arms = map(lambda mu: BernoulliArm(mu), means)
times, chosen_arms, rewards, cumulative_rewards = test_algorithm(algo, arms, 500)

# accuracy of the Epsilon Greedy Algorithm
best_arms = [0.0 for _ in range(len(times))]
for t in times:
    if chosen_arms[t-1] == 4:
        if t == 1:
            best_arms[t-1] = 1.0
        else:
            best_arms[t-1] = 1.0 * (best_arms[t-2] * (t-1) + 1) / t
    else:
        if t == 1:
            best_arms[t-1] = 0.0
        else:
            best_arms[t-1] = 1.0 * best_arms[t-2] * (t-1) / t
plt.subplot(221)
plt.plot(times, best_arms)
plt.grid()

# Performance of the Epsilon Greedy Algorithm
average_rewards = [0.0 for _ in range(len(times))]
for t in times:
示例#2
0
# -*- coding:utf-8 -*-
import random
from algorithms.epsilon_greedy import EpsilonGreedy
from arms.bernoulli import BernoulliArm
from tests.test_framework import test_algorithm

random.seed(1)
means = [0.1, 0.1, 0.1, 0.1, 0.9]
random.shuffle(means)
arms = map(lambda mu: BernoulliArm(mu), means)

f = open('demo.tsv', 'w')
algo = EpsilonGreedy(0.1, [], [])
results = test_algorithm(algo, arms, 5000, 250)
for i in range(len(results[0])):
    f.write(str(0.1) + '\t')
    f.write('\t'.join([str(results[j][i]) for j in range(len(results))]) + '\n')
f.close()