Python GreedyPolicy примеры использования

Язык программирования: Python

Пространство имен/Пакет: bandits.policy

Класс/Тип: GreedyPolicy

Примеров на hotexamples.com: 8

Python GreedyPolicy - 8 примеров найдено. Это лучшие примеры Python кода для bandits.policy.GreedyPolicy, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

GreedyPolicy(8)

Основные методы

GreedyPolicy (8)

Пример #1

Показать файл

class UCBExample:
    label = '2.6 - Upper-Confidence-Bound Action Selection'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, GreedyPolicy(1), prior=10),
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandit, UCBPolicy(2)),
    ]

Пример #2

Показать файл

class OptimisticInitialValueExample:
    label = 'Optimistic Initial Values'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, GreedyPolicy(1), prior=1),
        Agent(bandit, GreedyPolicy(1), prior=2),
        Agent(bandit, GreedyPolicy(1), prior=5),
        Agent(bandit, GreedyPolicy(1), prior=10),
    ]

Пример #3

Показать файл

class EpsilonGreedyExample:
    label = 'Action-Value Methods'
    bandit = GaussianBandit(10)
    GreedyPolicy(1)
    agents = [
        Agent(bandit, GreedyPolicy(1)),
        Agent(bandit, EpsilonGreedyPolicy(0.01, 1)),
        Agent(bandit, EpsilonGreedyPolicy(0.1, 1)),
        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
        # Agent(bandit, GreedyPolicy(10)),
        # Agent(bandit, EpsilonGreedyPolicy(0.1, 10)),
    ]

Пример #4

Показать файл

class BinomialExample:
    label = 'Bayesian Bandits - Binomial (n=5)'
    bandit = BinomialBandit(10, n=5, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]

Пример #5

Показать файл

class BernoulliExample:
    label = 'Bayesian Bandits - Bernoulli'
    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]

Пример #6

Показать файл

Файл: sutton_barto.py Проект: snazari/multi-arm-bandit

class EpsilonGreedyExample:
    label = '2.2 - Action-Value Methods'
    bandit = GaussianBandit(10)
    agents = [
        Agent(bandit, GreedyPolicy()),
        Agent(bandit, EpsilonGreedyPolicy(0.01)),
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
    ]

Пример #7

Показать файл

class CompareExample:
    label = 'Action'
    n_arms = 10
    n_trials = 1000
    bandits = [GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms), GaussianBandit(n_arms),
               GaussianBandit(n_arms, mu=4), BernoulliBandit(n_arms, t=3 * n_trials)]
    agents = [
        Agent(bandits[0], GreedyPolicy(1)),
        Agent(bandits[1], EpsilonGreedyPolicy(0.1, 1)),
        Agent(bandits[2], GreedyPolicy(1), prior=5),
        Agent(bandits[3], UCBPolicy(2)),
        GradientAgent(bandits[4], SoftmaxPolicy(), alpha=0.1, baseline=False),
        BetaAgent(bandits[5], GreedyPolicy())

        # Agent(bandit, EpsilonGreedyPolicy(0.1, 1), prior=5),
        # Agent(bandit, GreedyPolicy(1), prior=5)
    ]

Пример #8

Показать файл

Файл: bayesian.py Проект: xiutingmi/bandits

"""
Takes advantage of multicore systems to speed up the simulation runs.
"""
from bandits.agent import Agent, BetaAgent
from bandits.bandit import BernoulliBandit
from bandits.policy import GreedyPolicy, EpsilonGreedyPolicy, UCBPolicy
from bandits.environment import Environment

if __name__ == '__main__':
    experiments = 500
    trials = 1000

    bandit = BernoulliBandit(10, t=3 * 1000)
    agents = [
        Agent(bandit, EpsilonGreedyPolicy(0.1)),
        Agent(bandit, UCBPolicy(1)),
        BetaAgent(bandit, GreedyPolicy())
    ]
    env = Environment(bandit, agents, label='Bayesian Bandits')
    scores, optimal = env.run(trials, experiments)
    env.plot_results(scores, optimal)
    env.plot_beliefs()