def get_config():
    """Generates the config for the experiment."""
    name = 'graph_correlated_sanity'
    n_stages = 20
    mu0 = -0.5
    sigma0 = 1
    sigma_tilde = 1
    step_count = 100
    step_size = 0.01

    agents = collections.OrderedDict([
        ('Langevin TS',
         functools.partial(CorrelatedBBLangevin, n_stages, mu0, sigma0,
                           sigma_tilde, step_count, step_size)),
        ('TS',
         functools.partial(CorrelatedBBTS, n_stages, mu0, sigma0,
                           sigma_tilde)),
        ('Gibbs TS',
         functools.partial(GibbsCorrelatedBB, n_stages, mu0, sigma0,
                           sigma_tilde)),
        ('bootstrap TS',
         functools.partial(BootstrapCorrelatedBB, n_stages, mu0, sigma0,
                           sigma_tilde))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(CorrelatedBinomialBridge, n_stages, mu0, sigma0,
                           sigma_tilde))
    ])
    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 500
    n_seeds = 1000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #2
0
def get_config():
    """Generates the config for the experiment."""
    name = 'finite_misspecified'
    n_arm = 3
    true_prior_success = [1, 1, 1]
    informative_prior_failure = [100, 100, 100]
    true_prior_failure = [50, 100, 200]

    def _correct_ts_init(n_arm):
        assert n_arm == 3  # adhoc method for this experiment
        agent = FiniteBernoulliBanditTS(n_arm)
        agent.set_prior(true_prior_success, informative_prior_failure)
        return agent

    agents = collections.OrderedDict([
        ('correct_ts', functools.partial(_correct_ts_init, n_arm)),
        ('misspecified_ts', functools.partial(FiniteBernoulliBanditTS, n_arm))
    ])

    def _env_init(n_arm):
        environment = DriftingFiniteArmedBernoulliBandit(n_arm, gamma=0.0)
        environment.set_prior(true_prior_success, true_prior_failure)
        return environment

    environments = collections.OrderedDict([
        ('env', functools.partial(_env_init, n_arm))
    ])
    experiments = collections.OrderedDict([(name, ExperimentWithMean)])
    n_steps = 1000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #3
0
def get_config():
  """Generates the config for the experiment."""
  name = 'dynamic_pricing'
  num_products = 5
  scale = 1
  noise_var = 10
  p_max = 1

  agents = collections.OrderedDict(
      [('bsPricing',
        functools.partial(BootstrapDynamicPricing,
                          num_products, scale, noise_var, p_max))]
  )

  environments = collections.OrderedDict(
      [('env',
        functools.partial(DynamicPricing,
                          num_products, scale, noise_var, p_max))]
  )
  experiments = collections.OrderedDict(
      [(name, ExperimentNoAction)]
  )
  n_steps = 80
  n_seeds = 2000
  config = Config(name, agents, environments, experiments, n_steps, n_seeds)
  return config
Пример #4
0
def get_config():
    """Generates the config for the experiment."""
    name = 'graph_indep_binary'
    n_stages = 20
    shape = 2
    scale = 0.5
    tol = 0.1
    alpha = 0.2
    beta = 0.5

    agents = collections.OrderedDict([
        ('Bootstrap',
         functools.partial(BootstrapIndependentBBWithBinaryReward, n_stages,
                           shape, scale, tol, alpha, beta)),
        ('Laplace',
         functools.partial(LaplaceIndependentBBWithBinaryReward, n_stages,
                           shape, scale, tol, alpha, beta))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(IndependentBinomialBridgeWithBinaryReward, n_stages,
                           shape, scale))
    ])
    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 500
    n_seeds = 1000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #5
0
def get_config():
    """Generates the config for the experiment."""
    name = 'graph_correlated'
    n_stages = 20
    mu0 = -0.5
    sigma0 = 1
    sigma_tilde = 1

    agents = collections.OrderedDict([
        ('coherent TS',
         functools.partial(CorrelatedBBTS, n_stages, mu0, sigma0,
                           sigma_tilde)),
        ('misspecified TS',
         functools.partial(IndependentBBTS, n_stages, mu0, sigma0,
                           sigma_tilde))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(CorrelatedBinomialBridge, n_stages, mu0, sigma0,
                           sigma_tilde))
    ])
    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 500
    n_seeds = 1000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #6
0
def get_config():
    """Generates the config for the experiment."""
    name = 'graph_indep_concurrent'
    n_stages = 20
    mu0 = -0.5
    sigma0 = 1
    sigma_tilde = 1
    num_agents = [1, 10, 20, 50, 100]

    agents_list = []
    for num_agent in num_agents:
        agents_list.append(
            ('K = ' + str(num_agent),
             functools.partial(IndependentBBMultipleTS, n_stages, mu0, sigma0,
                               sigma_tilde, num_agent)))

    agents = collections.OrderedDict(agents_list)

    environments = collections.OrderedDict([
        ('env',
         functools.partial(MultiAgentCorrelatedBinomialBridge, n_stages, mu0,
                           sigma0, sigma_tilde))
    ])
    experiments = collections.OrderedDict([(name, ExperimentMultipleAgents)])
    n_steps = 100
    n_seeds = 1000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
def get_config():
    """Generates the config for the experiment."""
    name = 'finite_simple_sanity'
    n_arm = 3
    step_size = 0.01
    step_count = 100
    agents = collections.OrderedDict([
        ('Laplace TS', functools.partial(FiniteBernoulliBanditLaplace, n_arm)),
        (
            'Langevin TS',
            functools.partial(FiniteBernoulliBanditLangevin, n_arm, step_count,
                              step_size),
        ),
        ('bootstrap TS',
         functools.partial(FiniteBernoulliBanditBootstrap, n_arm)),
        ('TS', functools.partial(FiniteBernoulliBanditTS, n_arm))
    ])
    environments = collections.OrderedDict()
    n_env = 100
    for env in range(n_env):
        probs = np.random.rand(n_arm)
        environments[env] = functools.partial(FiniteArmedBernoulliBandit,
                                              probs)

    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 1000
    n_seeds = 100
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #8
0
def get_config():
    """Generates the config for the experiment."""
    name = 'logistic'
    num_articles = 3
    dim = 7
    theta_mean = 0
    theta_std = 1
    epsilon1 = 0.01
    epsilon2 = 0.05
    batch_size = 50
    step_count = 200
    step_size = 1 / 200

    alpha = 0.2
    beta = 0.5
    tol = 0.0001

    agents = collections.OrderedDict([  #('greedy',
        # functools.partial(GreedyLogisticBandit,
        #                   num_articles, dim, theta_mean, theta_std, epsilon1,
        #                   alpha,beta,tol)),
        ('OSAGA-LD TS',
         functools.partial(OSAGALDTSLogisticBandit, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol,
                           batch_size, step_count, step_size)),
        ('SAGA-LD TS',
         functools.partial(SAGALDTSLogisticBandit, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol,
                           batch_size, step_count, step_size)),
        ('Langevin TS',
         functools.partial(LangevinTSLogisticBandit, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol,
                           batch_size, step_count, step_size)),
        #(str(epsilon1)+'-greedy',
        # functools.partial(EpsilonGreedyLogisticBandit,
        #                   num_articles, dim, theta_mean, theta_std, epsilon1,
        #                   alpha,beta,tol)),
        #(str(epsilon2)+'-greedy',
        # functools.partial(EpsilonGreedyLogisticBandit,
        #                   num_articles, dim, theta_mean, theta_std, epsilon2,
        #                   alpha,beta,tol)),
        ('Laplace TS',
         functools.partial(LaplaceTSLogisticBandit, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol))
    ])

    environments = collections.OrderedDict([
        ('env', functools.partial(LogisticBandit, num_articles, dim, None,
                                  None))
    ]  #theta_mean, theta_std))]
                                           )

    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 5000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #9
0
def get_config():
    """Generates the config for the experiment."""
    name = 'cascade'
    num_items = 50
    num_positions = 10
    true_a0 = 1
    true_b0 = 40
    best_optimism = 0.1

    def _ts_init(num_items, num_positions):
        agent = CascadingBanditTS(num_items,
                                  num_positions,
                                  a0=true_a0,
                                  b0=true_b0)
        return agent

    def _ucb1_init(num_items, num_positions):
        agent = CascadingBanditUCB1(num_items,
                                    num_positions,
                                    a0=true_a0,
                                    b0=true_b0,
                                    optimism=1)
        return agent

    def _ucb_best_init(num_items, num_positions):
        agent = CascadingBanditUCB1(num_items,
                                    num_positions,
                                    a0=true_a0,
                                    b0=true_b0,
                                    optimism=best_optimism)
        return agent

    agents = collections.OrderedDict([
        ('ts', functools.partial(_ts_init, num_items, num_positions)),
        ('ucb1', functools.partial(_ucb1_init, num_items, num_positions)),
        ('ucb-best', functools.partial(_ucb_best_init, num_items,
                                       num_positions))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(CascadingBandit, num_items, num_positions, true_a0,
                           true_b0))
    ])

    # Very large experiment so don't log as frequently to keep file sensible.
    experiments = collections.OrderedDict([
        (name, functools.partial(ExperimentNoAction, rec_freq=10))
    ])
    n_steps = 5000
    n_seeds = 1000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #10
0
def get_config():
    """Generates the config for the experiment."""
    name = 'graph_indep'
    n_stages = 20
    mu0 = -0.5
    sigma0 = 1
    sigma_tilde = 1

    agents = collections.OrderedDict([
        ('ts',
         functools.partial(IndependentBBTS, n_stages, mu0, sigma0,
                           sigma_tilde)),
        ('greedy',
         functools.partial(IndependentBBEpsilonGreedy,
                           n_stages,
                           mu0,
                           sigma0,
                           sigma_tilde,
                           epsilon=0.0)),
        ('0.01-greedy',
         functools.partial(IndependentBBEpsilonGreedy,
                           n_stages,
                           mu0,
                           sigma0,
                           sigma_tilde,
                           epsilon=0.01)),
        ('0.05-greedy',
         functools.partial(IndependentBBEpsilonGreedy,
                           n_stages,
                           mu0,
                           sigma0,
                           sigma_tilde,
                           epsilon=0.05)),
        ('0.1-greedy',
         functools.partial(IndependentBBEpsilonGreedy,
                           n_stages,
                           mu0,
                           sigma0,
                           sigma_tilde,
                           epsilon=0.1))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(IndependentBinomialBridge, n_stages, mu0, sigma0,
                           sigma_tilde))
    ])
    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 500
    n_seeds = 1000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #11
0
def get_config():
    """Generates the config for the experiment."""
    name = 'news_recommendation'
    num_articles = 3
    dim = 7
    theta_mean = 0
    theta_std = 1
    epsilon1 = 0.01
    epsilon2 = 0.05
    batch_size = 50
    step_count = 200
    step_size = 1 / 200

    alpha = 0.2
    beta = 0.5
    tol = 0.0001

    agents = collections.OrderedDict([
        ('greedy',
         functools.partial(GreedyNewsRecommendation, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol)),
        ('Langevin TS',
         functools.partial(LangevinTSNewsRecommendation, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol,
                           batch_size, step_count, step_size)),
        (str(epsilon1) + '-greedy',
         functools.partial(EpsilonGreedyNewsRecommendation, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol)),
        (str(epsilon2) + '-greedy',
         functools.partial(EpsilonGreedyNewsRecommendation, num_articles, dim,
                           theta_mean, theta_std, epsilon2, alpha, beta, tol)),
        ('Laplace TS',
         functools.partial(LaplaceTSNewsRecommendation, num_articles, dim,
                           theta_mean, theta_std, epsilon1, alpha, beta, tol))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(NewsRecommendation, num_articles, dim, theta_mean,
                           theta_std))
    ])

    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 5000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #12
0
def get_config():
  """Generates the config for the experiment."""
  name = 'graph_indep_binary_new'
  n_stages = 20
  shape = 2
  scale = 0.5
  tol = 0.001
  alpha = 0.2
  beta = 0.5
  langevin_batch_size = 100
  langevin_step_count = 200
  langevin_step_size = 0.0005
  epsilon = 0
  
  agents = collections.OrderedDict(
      [('Langevin TS',
        functools.partial(EpsilonGreedyIndependentBBWithBinaryReward,
                          n_stages, epsilon, shape, scale, tol, alpha, beta))])
  
  
#  agents = collections.OrderedDict(
#      [('Langevin TS',
#        functools.partial(StochasticLangevinMCMCIndependentBBWithBinaryReward,
#                          n_stages, shape, scale, tol, alpha, beta, langevin_batch_size,
#                          langevin_step_count, langevin_step_size)),
#       ('bootstrap TS',
#        functools.partial(BootstrapIndependentBBWithBinaryReward,
#                          n_stages, shape, scale, tol, alpha, beta)),
#       ('Laplace TS',
#        functools.partial(LaplaceIndependentBBWithBinaryReward,
#                          n_stages, shape, scale, tol, alpha, beta))]
#  )
       
  environments = collections.OrderedDict(
      [('env',
        functools.partial(IndependentBinomialBridgeWithBinaryReward,
                          n_stages, shape, scale))]
  )
  experiments = collections.OrderedDict(
      [(name, ExperimentNoAction)]
  )
  n_steps = 500
  n_seeds = 1000
  config = Config(name, agents, environments, experiments, n_steps, n_seeds)
  return config
Пример #13
0
def get_config():
    """Generates the config for the experiment."""
    name = 'finite_drift'
    n_arm = 3
    agents = collections.OrderedDict([
        ('stationary_ts', functools.partial(FiniteBernoulliBanditTS, n_arm)),
        ('nonstationary_ts',
         functools.partial(DriftingFiniteBernoulliBanditTS, n_arm))
    ])

    environments = collections.OrderedDict([
        ('env', functools.partial(DriftingFiniteArmedBernoulliBandit, n_arm))
    ])
    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 1000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #14
0
def get_config():
    """Generates the config for the experiment."""
    name = 'finite_simple'
    n_arm = 3
    agents = collections.OrderedDict([
        ('greedy', functools.partial(FiniteBernoulliBanditEpsilonGreedy,
                                     n_arm)),
        ('ts', functools.partial(FiniteBernoulliBanditTS, n_arm))
    ])
    probs = [0.7, 0.8, 0.9]
    environments = collections.OrderedDict([
        ('env', functools.partial(FiniteArmedBernoulliBandit, probs))
    ])
    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 1000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #15
0
def get_config():
    """Generates the config for the experiment."""
    name = 'product_assortment'
    num_products = 6
    prior_mean = 0
    prior_var_diagonal = 1
    prior_var_off_diagonal = 0.2
    noise_var = 0.04
    profits = np.array([1 / 6] * 6)
    epsilon = 0.07
    k = 9

    agents = collections.OrderedDict([
        ('TS',
         functools.partial(TSAssortment, num_products, prior_mean,
                           prior_var_diagonal, prior_var_off_diagonal,
                           noise_var, profits, epsilon, k)),
        ('greedy',
         functools.partial(GreedyAssortment, num_products, prior_mean,
                           prior_var_diagonal, prior_var_off_diagonal,
                           noise_var, profits, epsilon, k)),
        (str(epsilon) + '-greedy',
         functools.partial(EpsilonGreedyAssortment, num_products, prior_mean,
                           prior_var_diagonal, prior_var_off_diagonal,
                           noise_var, profits, epsilon, k)),
        (str(k) + '/(' + str(k) + '+t)-greedy',
         functools.partial(AnnealingEpsilonGreedyAssortment, num_products,
                           prior_mean, prior_var_diagonal,
                           prior_var_off_diagonal, noise_var, profits, epsilon,
                           k))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(ProductAssortment, num_products, prior_mean,
                           prior_var_diagonal, prior_var_off_diagonal,
                           noise_var, profits))
    ])
    experiments = collections.OrderedDict([(name, ExperimentNoAction)])
    n_steps = 500
    n_seeds = 20000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #16
0
def get_config():
    """Generates the config for the experiment."""
    name = 'cascade'
    num_items = 50
    num_positions = 10
    true_a0 = 1
    true_b0 = 10

    def _correct_ts_init(num_items, num_positions):
        agent = CascadingBanditTS(num_items,
                                  num_positions,
                                  a0=true_a0,
                                  b0=true_b0)
        return agent

    agents = collections.OrderedDict([
        ('correct_ts',
         functools.partial(_correct_ts_init, num_items, num_positions)),
        ('misspecified_ts',
         functools.partial(CascadingBanditTS, num_items, num_positions)),
        ('ucb1',
         functools.partial(CascadingBanditUCB1, num_items, num_positions)),
        ('kl_ucb',
         functools.partial(CascadingBanditKLUCB, num_items, num_positions))
    ])

    environments = collections.OrderedDict([
        ('env',
         functools.partial(CascadingBandit, num_items, num_positions, true_a0,
                           true_b0))
    ])

    # Very large experiment so don't log as frequently to keep file sensible.
    experiments = collections.OrderedDict([
        (name, functools.partial(ExperimentNoAction, rec_freq=10))
    ])
    n_steps = 5000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #17
0
def get_config():
    """Generates the config for the experiment."""
    name = 'finite_simple_rand'
    n_arm = 3
    agents = collections.OrderedDict([
        ('greedy', functools.partial(FiniteBernoulliBanditEpsilonGreedy,
                                     n_arm)),
        ('ts', functools.partial(FiniteBernoulliBanditTS, n_arm))
    ])

    environments = collections.OrderedDict()
    n_env = 100
    for env in range(n_env):
        probs = np.random.rand(n_arm)
        environments[env] = functools.partial(FiniteArmedBernoulliBandit,
                                              probs)

    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 1000
    n_seeds = 100
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #18
0
def get_config():
    """Generates the config for the experiment."""
    name = 'rl_bbq'
    #   true_theta = [0.9, 0.1]
    unit_circle_angle = np.random.uniform(
        0, 2 * np.pi
    )  # sample 100-dim theta with first two non-zero and l2-norm theta=1
    true_theta = [np.cos(unit_circle_angle),
                  np.sin(unit_circle_angle)] + [0] * 98
    kappa_1 = 0.4
    kappa_2 = 0.5
    args = argparse.Namespace(
    )  # From https://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary
    args.n_feat = len(true_theta)
    args.optim = 'sgd'
    args.learn_rate = 1e-2
    args.momentum = 0.9  # only for SGD
    args.gamma = 1  # discount factor
    args.sample_cost = 2  # 0<=cost
    args.in_dim = 100  # input dim of policy
    args.n_act = 2  # num actions
    agents = collections.OrderedDict([
        ('bbq_k4', functools.partial(SelectiveSampleBBQ, args.n_feat,
                                     kappa_1)),
        ('bbq_k5', functools.partial(SelectiveSampleBBQ, args.n_feat,
                                     kappa_2)),
        ('reinf', functools.partial(PolicyGradientREINFORCE, PolicyNN, args))
    ])
    environments = collections.OrderedDict([
        ('env',
         functools.partial(ContextualBanditFunctionalContext, uniform_iid,
                           true_theta, linear_classifier))
    ])
    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 1000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #19
0
def get_config():
    """Generates the config for the experiment."""
    name = 'bbq'
    true_theta = [1.0, 0.5]
    #   unit_circle_angle = np.random.uniform(0, 2*np.pi) # sample 100-dim theta with first two non-zero and l2-norm theta=1
    #   true_theta = [np.cos(unit_circle_angle), np.sin(unit_circle_angle)] + [0]*98
    kappa_1 = 0.25
    kappa_2 = 0.3
    n_feat = len(true_theta)
    agents = collections.OrderedDict([
        ('bbq_k25', functools.partial(SelectiveSampleBBQ, n_feat, kappa_1)),
        ('bbq_k3', functools.partial(SelectiveSampleBBQ, n_feat, kappa_2))
    ])
    environments = collections.OrderedDict(
        [('env',
          functools.partial(ContextualBanditFunctionalContext, normal_iid,
                            true_theta, linear_classifier))]
        #   [('env', functools.partial(ContextualBanditFunctionalContext, uniform_iid, true_theta, linear_classifier))]
    )
    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 1000
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #20
0
def get_config():
    """Generates the config for the experiment."""
    name = 'ensemble_nn'

    input_dim = 100
    hidden_dim = 50
    num_actions = 100
    prior_var = 1.
    noise_var = 100.

    # We have to do something weird since ensemble_nn requires construction of
    # the environment before the agent in order to specify the actions.
    agents = collections.OrderedDict([
        ('epsilon=0.01',
         lambda: functools.partial(EpsilonGreedy, epsilon_param=0.01)),
        ('epsilon=0.05',
         lambda: functools.partial(EpsilonGreedy, epsilon_param=0.05)),
        ('epsilon=0.1',
         lambda: functools.partial(EpsilonGreedy, epsilon_param=0.1)),
        ('epsilon=0.2',
         lambda: functools.partial(EpsilonGreedy, epsilon_param=0.2)),
        ('epsilon=0.3',
         lambda: functools.partial(EpsilonGreedy, epsilon_param=0.3)),
        ('epsilon=10/(10+t)',
         lambda: functools.partial(EpsilonAnnealing, epsilon_param=10.)),
        ('epsilon=20/(20+t)',
         lambda: functools.partial(EpsilonAnnealing, epsilon_param=20.)),
        ('epsilon=30/(30+t)',
         lambda: functools.partial(EpsilonAnnealing, epsilon_param=30.)),
        ('epsilon=40/(40+t)',
         lambda: functools.partial(EpsilonAnnealing, epsilon_param=40.)),
        ('epsilon=50/(50+t)',
         lambda: functools.partial(EpsilonAnnealing, epsilon_param=50.)),
        ('ensemble=1',
         lambda: functools.partial(EnsembleSampling, num_models=3)),
        ('ensemble=10',
         lambda: functools.partial(EnsembleSampling, num_models=10)),
        ('ensemble=30',
         lambda: functools.partial(EnsembleSampling, num_models=30)),
        ('ensemble=100',
         lambda: functools.partial(EnsembleSampling, num_models=100)),
        ('ensemble=300',
         lambda: functools.partial(EnsembleSampling, num_models=300)),
        ('dropout=0.1',
         lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.1)),
        ('dropout=0.25',
         lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.25)),
        ('dropout=0.5',
         lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.5)),
        ('dropout=0.75',
         lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.75)),
        ('dropout=0.9',
         lambda: functools.partial(TwoLayerNNDropout, drop_prob=0.9))
    ])

    # Similarly we do not actually evaluate the environment since we need a custom
    # experiment function.
    def _custom_partial_nn():
        f = functools.partial(TwoLayerNNBandit, input_dim, hidden_dim,
                              num_actions, prior_var, noise_var)
        return f

    environments = collections.OrderedDict([('env', _custom_partial_nn)])

    n_steps = 1000
    n_seeds = 1000

    def _env_constructor(agent_lambda, env_lambda, n_steps, seed, unique_id):
        """Constructor for neural network experiments.

    This is more involved than other configs since the construction of the
    actors requires specification of the environment. We could/should improve
    on this with a code refactor, but leaving it now since it's working.
    """
        environment = env_lambda(seed=seed)
        actions = environment.get_actions()
        agent = agent_lambda(input_dim, hidden_dim, actions, n_steps,
                             prior_var, noise_var)
        experiment = ExperimentNoAction(agent,
                                        environment,
                                        n_steps,
                                        seed,
                                        unique_id=unique_id)
        return experiment

    experiments = collections.OrderedDict([(name, _env_constructor)])

    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config
Пример #21
0
def get_config():
    """Generates the config for the experiment."""
    #   name = 'rl_reinf_ac_bbq'
    name = 'rl_reinf_ac_feat_bbq_uniform'
    #   name = 'rl_reinf_ac_bbq_uniform'
    #   name = 'rl_reinf_ac_bbq_adult'
    #   true_theta = [1.0, 0.5]
    #   kappa_1 = 1.0
    #   kappa_2 = 0.9999
    unit_circle_angle = np.random.uniform(
        0, 2 * np.pi
    )  # sample 100-dim theta with first two non-zero and l2-norm theta=1
    true_theta = [np.cos(unit_circle_angle),
                  np.sin(unit_circle_angle)] + [0] * 98
    #   infile = '../data/adult.csv'
    kappa_1 = 0.08
    kappa_2 = 0.1
    #   kappa_1 = 0.12
    #   kappa_2 = 0.1
    args_1 = argparse.Namespace(
    )  # From https://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary
    args_1.n_feat = len(true_theta)
    args_1.optim = 'sgd'
    #   args_1.learn_rate = 2e-4
    args_1.learn_rate = 1e-4
    args_1.momentum = 0.9  # only for SGD
    args_1.gamma = 1  # discount factor
    args_1.sample_cost = 0.9  # 0<=cost
    #   args_1.sample_cost = 1 # 0<=cost
    args_1.in_dim = len(true_theta) + 1  # input dim of policy
    args_1.n_act = 2  # num actions

    args_2 = argparse.Namespace(
    )  # From https://stackoverflow.com/questions/16878315/what-is-the-right-way-to-treat-python-argparse-namespace-as-a-dictionary
    args_2.n_feat = len(true_theta)
    args_2.optim = 'sgd'  #'adam'
    #   args_2.learn_rate = 2e-4
    args_2.learn_rate = 1e-4
    args_2.momentum = 0.9  # only for SGD
    args_2.gamma = 1  # discount factor
    args_2.sample_cost = 0.9  # 0 <= cost
    #   args_2.sample_cost = 1 # 0 <= cost
    args_2.in_dim = len(true_theta) + 1  # input dim of policy
    args_2.n_act = 2  # num actions

    agents = collections.OrderedDict([
        ('bbq_k08',
         functools.partial(SelectiveSampleBBQ, args_1.n_feat, kappa_1)),
        ('bbq_k1', functools.partial(SelectiveSampleBBQ, args_1.n_feat,
                                     kappa_2)),
        ('reinf', functools.partial(PolicyGradientREINFORCE, PolicyNN,
                                    args_1)),
        ('ac',
         functools.partial(PolicyGradientActorCritic, PolicyNNActorCritic,
                           args_2))
    ])
    environments = collections.OrderedDict(
        #   [('env', functools.partial(ContextualBanditFunctionalContext, normal_iid, true_theta, linear_classifier))]
        [('env',
          functools.partial(ContextualBanditFunctionalContext, uniform_iid,
                            true_theta, linear_classifier))]
        #   [('env', functools.partial(ContextualBanditDataFileContext, infile))]
    )
    experiments = collections.OrderedDict([(name, BaseExperiment)])
    n_steps = 500
    n_seeds = 10000
    config = Config(name, agents, environments, experiments, n_steps, n_seeds)
    return config