Пример #1
0
def main(_):

    # Problem parameters
    num_contexts = 20000
    nb_simulations = 2
    l_sizes = [50, 50]
    plt_dir = "plots/"
    dict_dir = "dicts/"

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = 'adult'

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=l_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                   context_dim=context_dim,
                                                   init_scale=0.3,
                                                   activation=tf.nn.relu,
                                                   layer_sizes=l_sizes,
                                                   batch_size=512,
                                                   activate_decay=True,
                                                   initial_lr=0.1,
                                                   max_grad_norm=5.0,
                                                   show_training=False,
                                                   freq_summary=1000,
                                                   buffer_s=-1,
                                                   initial_pulls=2,
                                                   reset_lr=True,
                                                   lr_decay_rate=0.5,
                                                   training_freq=10,
                                                   training_freq_network=50,
                                                   training_epochs=100,
                                                   a0=6,
                                                   b0=6,
                                                   lambda_prior=0.25)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)

    hparams_ucb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=l_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              optimizer='RMS',
                                              training_freq=1,
                                              training_freq_network=50,
                                              training_epochs=100,
                                              lambda_prior=0.25,
                                              delta=0.01,
                                              lamb=0.01,
                                              mu=1,
                                              S=1)

    algos = [
        #UniformSampling('Uniform Sampling', hparams),
        #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
        PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        NeuralLinearPosteriorSamplingFiniteMemory(
            'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
        NeuralLinearPosteriorSamplingFiniteMemory(
            'NeuralLinearFiniteMemory_noP',
            hparams_nlinear_finite_memory_no_prior),
        #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior),
        #NeuralUCBSampling('NeuralUCB', hparams_ucb)
    ]

    regrets = {}
    rewards = {}
    for a in algos:
        regrets[a.name] = np.zeros((nb_simulations, num_contexts))
        rewards[a.name] = np.zeros(nb_simulations)
    rewards['opt_reward'] = np.zeros(nb_simulations)

    for k in range(nb_simulations):

        algos = [
            #UniformSampling('Uniform Sampling', hparams),
            #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
            PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
            PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
            NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noP',
                hparams_nlinear_finite_memory_no_prior),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior),
            #NeuralUCBSampling('NeuralUCB', hparams_ucb)
        ]

        # Run contextual bandit problem
        t_init = time.time()
        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        _, h_rewards = results

        # Display results
        display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                        data_type)

        for j, a in enumerate(algos):
            regrets[a.name][k, :] = np.cumsum(opt_rewards - h_rewards[:, j])
            rewards[a.name][k] = np.sum(h_rewards[:, j])
        rewards['opt_reward'][k] = np.sum(opt_rewards)

    save_plot(algos, regrets, data_type, num_contexts, plt_dir)
    np.save(dict_dir + 'dict_' + data_type + '.npy', rewards)
Пример #2
0
def main(_):

  # Problem parameters
  num_contexts = 2000

  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'mushroom'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=50,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)

  algos = [
      UniformSampling('Uniform Sampling', hparams),
      UniformSampling('Uniform Sampling 2', hparams),
      FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
      FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
      PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
      PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
      PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
      NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
      NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
      LinearFullPosteriorSampling('LinFullPost', hparams_linear),
      BootstrappedBNNSampling('BootRMS', hparams_rms),
      ParameterNoiseSampling('ParamNoise', hparams_pnoise),
      PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
      PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
  ]

  # Run contextual bandit problem
  t_init = time.time()
  results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
  _, h_rewards = results

  # Display results
  display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
Пример #3
0
def run_iter():

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = FLAGS.dataset

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=3)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_luga = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_contexts=num_contexts,
                                             context_dim=context_dim,
                                             activation=tf.nn.relu,
                                             latent_dim=50,
                                             batch_size=512,
                                             initial_lr=2e-4,
                                             show_training=False,
                                             lr_decay=False,
                                             freq_summary=10000,
                                             buffer_s=-1,
                                             initial_pulls=2,
                                             training_freq=20,
                                             training_epochs=40,
                                             lambda_prior=0.25,
                                             show_loss=False,
                                             kl=1.0,
                                             recon=1.0,
                                             psigma=1.0,
                                             glnoise=False)

    hparams_sivi1 = tf.contrib.training.HParams(num_actions=num_actions,
                                                num_contexts=num_contexts,
                                                context_dim=context_dim,
                                                activation=tf.nn.relu,
                                                latent_dim=50,
                                                batch_size=512,
                                                initial_lr=1e-3,
                                                show_training=False,
                                                verbose=False,
                                                lr_decay=False,
                                                freq_summary=10000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                training_freq=20,
                                                training_epochs=40,
                                                lambda_prior=0.25,
                                                show_loss=False,
                                                kl=1.0,
                                                recon=1.0,
                                                two_decoder=False,
                                                glnoise=False,
                                                psigma=1.25)
    
    hparams_lusi_abl_km = tf.contrib.training.HParams(num_actions=num_actions,
                                                      num_contexts=num_contexts,
                                                      context_dim=context_dim,
                                                      activation=tf.nn.relu,
                                                      latent_dim=50,
                                                      batch_size=512,
                                                      initial_lr=1e-3,
                                                      show_training=False,
                                                      verbose=False,
                                                      lr_decay=False,
                                                      freq_summary=10000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      training_freq=20,
                                                      training_epochs=40,
                                                      lambda_prior=0.25,
                                                      show_loss=False,
                                                      km=1,
                                                      onez=0,
                                                      recon=1.0,
                                                      two_decoder=False,
                                                      glnoise=False,
                                                      psigma=1.25)

    hparams_luga_abl_km = tf.contrib.training.HParams(num_actions=num_actions,
                                                      num_contexts=num_contexts,
                                                      context_dim=context_dim,
                                                      activation=tf.nn.relu,
                                                      latent_dim=50,
                                                      batch_size=512,
                                                      initial_lr=2e-4,
                                                      show_training=False,
                                                      lr_decay=False,
                                                      freq_summary=10000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      training_freq=20,
                                                      training_epochs=40,
                                                      lambda_prior=0.25,
                                                      show_loss=False,
                                                      km=1,
                                                      onez=0,
                                                      recon=1.0,
                                                      psigma=1.0,
                                                      glnoise=False)


    algos = [
        UniformSampling('Uniform Sampling', hparams), #1

        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), #2

        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), #3

        LinearFullPosteriorSampling('LinFullPost', hparams_linear), #4

        PiposteriorBNNSampling('DGF', hparams_bbb, 'DGF'), #5

        VariationalSampling_v4('LU_Gaussian', hparams_luga), #6

        # A smaller learning rate like 3e-4 or 1e-4 will work better on the 'mushroom' dataset for LU_SIVI and LU_Gaussian
        VariationalSamplingSivi_dgf_v7("LU_SIVI", hparams_sivi1), #7 

        # For Ablation Study

        VariationalSampling_abl('LU_Gaussian_Ablation_multi_z_1m', hparams_luga_abl_km),

        VariationalSamplingSivi_dgf_abl("LU_SIVI_Ablation_multi_z_1m", hparams_lusi_abl_km)

    ]

    t_init = time.time()
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
    _, h_rewards = results

    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)

    opt_rewards = opt_rewards.reshape([-1, 1])
    regret_i = opt_rewards - h_rewards

    return regret_i
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer=param[2],
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=50),
                    'RMSProp'))

    print(len(neural_greedy_protos))

    random_proto = lambda: UniformSampling('Uniform Sampling', hparams)
    linThompson_proto = lambda: LinearFullPosteriorSampling('linThompson', hparams_linear)
    linUCB_proto = lambda: LinUCB('linUCB', hparams_linucb)
    linEps_proto = lambda: LinEpsilon('LinEpsilon', hparams_lineps)

    algo_protos = neural_greedy_protos + [linUCB_proto, linEps_proto, linThompson_proto, random_proto]
    # for algo_proto in algo_protos:
    #     algo = algo_proto()
    #     print(algo.name, algo.hparams)
    # print (algo_protos[0]==algo_protos[1])

    # Run experiments several times save and plot results
    benchmarker = Benchmarker(algo_protos, dataset_proto, num_actions, context_dim, nb_contexts=num_contexts,
                              test_name='NNparams_linear_test1_0_10')

    benchmarker.run_experiments(50)
    benchmarker.save_results('./results/')
Пример #5
0
def main(_):
    # create dataset
    data_type = "job_bank"
    num_contexts = 2000
    num_actions = 2
    context_dim = 2
    dataset = np.empty((num_contexts, 4), dtype=np.float)
    opt_actions = np.empty(num_contexts, dtype=np.int)
    opt_rewards = np.empty(num_contexts, dtype=np.float)
    for iter in range(num_contexts):
        ctx = context_bandit_gen_context()
        all_probs = [context_bandit_prob(ctx, a) for a in range(num_actions)]
        optimal = np.argmax(all_probs)
        rewards = [context_bandit_reward(ctx, a) for a in range(num_actions)]
        dataset[iter, :] = np.array(ctx.tolist() + rewards)
        opt_actions[iter] = optimal
        opt_rewards[iter] = all_probs[optimal]

    hparams = HParams(num_actions=num_actions)

    hparams_linear = HParams(num_actions=num_actions,
                             context_dim=context_dim,
                             a0=6,
                             b0=6,
                             lambda_prior=0.25,
                             initial_pulls=2)

    hparams_rms = HParams(num_actions=num_actions,
                          context_dim=context_dim,
                          init_scale=0.3,
                          activation=tf.nn.relu,
                          layer_sizes=[50],
                          batch_size=512,
                          activate_decay=True,
                          initial_lr=0.1,
                          max_grad_norm=5.0,
                          show_training=False,
                          freq_summary=1000,
                          buffer_s=-1,
                          initial_pulls=2,
                          optimizer='RMS',
                          reset_lr=True,
                          lr_decay_rate=0.5,
                          training_freq=50,
                          training_epochs=100,
                          p=0.95,
                          q=3,
                          verbose=False)

    hparams_dropout = HParams(num_actions=num_actions,
                              context_dim=context_dim,
                              init_scale=0.3,
                              activation=tf.nn.relu,
                              layer_sizes=[50],
                              batch_size=512,
                              activate_decay=True,
                              initial_lr=0.1,
                              max_grad_norm=5.0,
                              show_training=False,
                              freq_summary=1000,
                              buffer_s=-1,
                              initial_pulls=2,
                              optimizer='RMS',
                              reset_lr=True,
                              lr_decay_rate=0.5,
                              training_freq=50,
                              training_epochs=100,
                              use_dropout=True,
                              keep_prob=0.80,
                              verbose=False)

    hparams_bbb = HParams(num_actions=num_actions,
                          context_dim=context_dim,
                          init_scale=0.3,
                          activation=tf.nn.relu,
                          layer_sizes=[50],
                          batch_size=512,
                          activate_decay=True,
                          initial_lr=0.1,
                          max_grad_norm=5.0,
                          show_training=False,
                          freq_summary=1000,
                          buffer_s=-1,
                          initial_pulls=2,
                          optimizer='RMS',
                          use_sigma_exp_transform=True,
                          cleared_times_trained=10,
                          initial_training_steps=100,
                          noise_sigma=0.1,
                          reset_lr=False,
                          training_freq=50,
                          training_epochs=100,
                          verbose=False)

    hparams_nlinear = HParams(num_actions=num_actions,
                              context_dim=context_dim,
                              init_scale=0.3,
                              activation=tf.nn.relu,
                              layer_sizes=[50],
                              batch_size=512,
                              activate_decay=True,
                              initial_lr=0.1,
                              max_grad_norm=5.0,
                              show_training=False,
                              freq_summary=1000,
                              buffer_s=-1,
                              initial_pulls=2,
                              reset_lr=True,
                              lr_decay_rate=0.5,
                              training_freq=1,
                              training_freq_network=50,
                              training_epochs=100,
                              a0=6,
                              b0=6,
                              lambda_prior=0.25,
                              verbose=False)

    hparams_nlinear2 = HParams(num_actions=num_actions,
                               context_dim=context_dim,
                               init_scale=0.3,
                               activation=tf.nn.relu,
                               layer_sizes=[50],
                               batch_size=512,
                               activate_decay=True,
                               initial_lr=0.1,
                               max_grad_norm=5.0,
                               show_training=False,
                               freq_summary=1000,
                               buffer_s=-1,
                               initial_pulls=2,
                               reset_lr=True,
                               lr_decay_rate=0.5,
                               training_freq=10,
                               training_freq_network=50,
                               training_epochs=100,
                               a0=6,
                               b0=6,
                               lambda_prior=0.25,
                               verbose=False)

    hparams_pnoise = HParams(num_actions=num_actions,
                             context_dim=context_dim,
                             init_scale=0.3,
                             activation=tf.nn.relu,
                             layer_sizes=[50],
                             batch_size=512,
                             activate_decay=True,
                             initial_lr=0.1,
                             max_grad_norm=5.0,
                             show_training=False,
                             freq_summary=1000,
                             buffer_s=-1,
                             initial_pulls=2,
                             optimizer='RMS',
                             reset_lr=True,
                             lr_decay_rate=0.5,
                             training_freq=50,
                             training_epochs=100,
                             noise_std=0.05,
                             eps=0.1,
                             d_samples=300,
                             verbose=False)

    hparams_alpha_div = HParams(num_actions=num_actions,
                                context_dim=context_dim,
                                init_scale=0.3,
                                activation=tf.nn.relu,
                                layer_sizes=[50],
                                batch_size=512,
                                activate_decay=True,
                                initial_lr=0.1,
                                max_grad_norm=5.0,
                                show_training=False,
                                freq_summary=1000,
                                buffer_s=-1,
                                initial_pulls=2,
                                optimizer='RMS',
                                use_sigma_exp_transform=True,
                                cleared_times_trained=10,
                                initial_training_steps=100,
                                noise_sigma=0.1,
                                reset_lr=False,
                                training_freq=50,
                                training_epochs=100,
                                alpha=1.0,
                                k=20,
                                prior_variance=0.1,
                                verbose=False)

    hparams_gp = HParams(num_actions=num_actions,
                         num_outputs=num_actions,
                         context_dim=context_dim,
                         reset_lr=False,
                         learn_embeddings=True,
                         max_num_points=1000,
                         show_training=False,
                         freq_summary=1000,
                         batch_size=512,
                         keep_fixed_after_max_obs=True,
                         training_freq=50,
                         initial_pulls=2,
                         training_epochs=100,
                         lr=0.01,
                         buffer_s=-1,
                         initial_lr=0.001,
                         lr_decay_rate=0.0,
                         optimizer='RMS',
                         task_latent_dim=5,
                         activate_decay=False,
                         verbose=False)

    algos = [
        UniformSampling('Uniform Sampling', hparams),
        FixedPolicySampling('Fixed 1', [0.75, 0.25], hparams),
        FixedPolicySampling('Fixed 2', [0.25, 0.75], hparams),
        PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
        PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        BootstrappedBNNSampling('BootRMS', hparams_rms),
        ParameterNoiseSampling('ParamNoise', hparams_pnoise),
        PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
        PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
    ]

    _, h_rewards, times = run_contextual_bandit(context_dim, num_actions,
                                                dataset, algos)

    display_results(algos, opt_rewards, opt_actions, h_rewards, times,
                    data_type)
def main(argv):
    opts = get_options()
    print("Parameters: {}".format(opts))
    address = ('localhost', opts.ipc_port)  # family is deduced to be 'AF_INET'
    listener = Listener(address, authkey=b'bandit')
    conn = listener.accept()
    multiprocessing.current_process().authkey = b'bandit'
    print('connection accepted from', listener.last_accepted)


    # Create contextual bandit
    bandit = IPCBandit(conn)

    if opts.algorithm == "uniform":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions)
        policy = UniformSampling('Uniform Sampling', policy_parameters)

    elif opts.algorithm == "linear":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                     context_dim=bandit.context_dim,
                                                     a0=6,
                                                     b0=6,
                                                     lambda_prior=0.25,
                                                     initial_pulls=2)
        policy = LinearFullPosteriorSampling('LinFullPost', policy_parameters)

    elif opts.algorithm == "rms":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  p=0.95,
                                                  q=3)
        policy = PosteriorBNNSampling('RMS', policy_parameters, 'RMSProp')

    elif opts.algorithm == "bootrms":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  p=0.95,
                                                  q=3)
        policy =BootstrappedBNNSampling('BootRMS', policy_parameters)

    elif opts.algorithm == "dropout":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                      context_dim=bandit.context_dim,
                                                      init_scale=0.3,
                                                      activation=tf.nn.relu,
                                                      layer_sizes=[50],
                                                      batch_size=512,
                                                      activate_decay=True,
                                                      initial_lr=0.1,
                                                      max_grad_norm=5.0,
                                                      show_training=False,
                                                      freq_summary=1000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      optimizer='RMS',
                                                      reset_lr=True,
                                                      lr_decay_rate=0.5,
                                                      training_freq=50,
                                                      training_epochs=100,
                                                      use_dropout=True,
                                                      keep_prob=0.80)
        policy = PosteriorBNNSampling('Dropout', policy_parameters, 'RMSProp')

    elif opts.algorithm == "bbb":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100)
        policy = PosteriorBNNSampling('BBB', policy_parameters, 'Variational')

    elif opts.algorithm == "neurallinear":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                      context_dim=bandit.context_dim,
                                                      init_scale=0.3,
                                                      activation=tf.nn.relu,
                                                      layer_sizes=[50],
                                                      batch_size=512,
                                                      activate_decay=True,
                                                      initial_lr=0.1,
                                                      max_grad_norm=5.0,
                                                      show_training=False,
                                                      freq_summary=1000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      reset_lr=True,
                                                      lr_decay_rate=0.5,
                                                      training_freq=1,
                                                      training_freq_network=50,
                                                      training_epochs=100,
                                                      a0=6,
                                                      b0=6,
                                                      lambda_prior=0.25)
        policy = NeuralLinearPosteriorSampling('NeuralLinear', policy_parameters)

    elif opts.algorithm == "neurallinear2":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                       context_dim=bandit.context_dim,
                                                       init_scale=0.3,
                                                       activation=tf.nn.relu,
                                                       layer_sizes=[50],
                                                       batch_size=512,
                                                       activate_decay=True,
                                                       initial_lr=0.1,
                                                       max_grad_norm=5.0,
                                                       show_training=False,
                                                       freq_summary=1000,
                                                       buffer_s=-1,
                                                       initial_pulls=2,
                                                       reset_lr=True,
                                                       lr_decay_rate=0.5,
                                                       training_freq=10,
                                                       training_freq_network=50,
                                                       training_epochs=100,
                                                       a0=6,
                                                       b0=6,
                                                       lambda_prior=0.25)
        policy = NeuralLinearPosteriorSampling('NeuralLinear2', policy_parameters)

    elif opts.algorithm == "pnoise":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                     context_dim=bandit.context_dim,
                                                     init_scale=0.3,
                                                     activation=tf.nn.relu,
                                                     layer_sizes=[50],
                                                     batch_size=512,
                                                     activate_decay=True,
                                                     initial_lr=0.1,
                                                     max_grad_norm=5.0,
                                                     show_training=False,
                                                     freq_summary=1000,
                                                     buffer_s=-1,
                                                     initial_pulls=2,
                                                     optimizer='RMS',
                                                     reset_lr=True,
                                                     lr_decay_rate=0.5,
                                                     training_freq=50,
                                                     training_epochs=100,
                                                     noise_std=0.05,
                                                     eps=0.1,
                                                     d_samples=300,
                                                     )
        policy = ParameterNoiseSampling('ParamNoise', policy_parameters)

    elif opts.algorithm == "alpha_div":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                        context_dim=bandit.context_dim,
                                                        init_scale=0.3,
                                                        activation=tf.nn.relu,
                                                        layer_sizes=[50],
                                                        batch_size=512,
                                                        activate_decay=True,
                                                        initial_lr=0.1,
                                                        max_grad_norm=5.0,
                                                        show_training=False,
                                                        freq_summary=1000,
                                                        buffer_s=-1,
                                                        initial_pulls=2,
                                                        optimizer='RMS',
                                                        use_sigma_exp_transform=True,
                                                        cleared_times_trained=10,
                                                        initial_training_steps=100,
                                                        noise_sigma=0.1,
                                                        reset_lr=False,
                                                        training_freq=50,
                                                        training_epochs=100,
                                                        alpha=1.0,
                                                        k=20,
                                                        prior_variance=0.1)
        policy = PosteriorBNNSampling('BBAlphaDiv', policy_parameters, 'AlphaDiv')

    elif opts.algorithm == "gp":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                        num_outputs=bandit.num_actions,
                                                        context_dim=bandit.context_dim,
                                                        reset_lr=False,
                                                        learn_embeddings=True,
                                                        max_num_points=1000,
                                                        show_training=False,
                                                        freq_summary=1000,
                                                        batch_size=512,
                                                        keep_fixed_after_max_obs=True,
                                                        training_freq=50,
                                                        initial_pulls=2,
                                                        training_epochs=100,
                                                        lr=0.01,
                                                        buffer_s=-1,
                                                        initial_lr=0.001,
                                                        lr_decay_rate=0.0,
                                                        optimizer='RMS',
                                                        task_latent_dim=5,
                                                        activate_decay=False)
        policy = PosteriorBNNSampling('MultitaskGP', policy_parameters, 'GP')

    else:
        raise Exception("Misspecified bandit algorithm.")

    print(policy)
    # Run the contextual bandit process
    while True:
        context = bandit.context()
        if context is None:
            break
        action = policy.action(context)
        reward = bandit.pull(action)
        if reward is None:
            break

        policy.update(context, action, reward)

    conn.close()
    listener.close()
Пример #7
0
def main(_):

  # Problem parameters
  num_contexts = 3500#2000
  tfn=200
  MEMSIZE = 700#num_contexts/10
  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'financial'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_nlinear_finite_memory = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=1,
                                                mem=MEMSIZE,
                                                mu_prior_flag=1,
                                                sigma_prior_flag=1,
                                                              )

  hparams_nlinear_finite_memory2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=1,
                                                 mem=MEMSIZE,
                                                 mu_prior_flag=1,
                                                 sigma_prior_flag=1,
                                                               )

  hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=1,
                                                mem=MEMSIZE,
                                                mu_prior_flag=0,
                                                sigma_prior_flag=0,
                                                              )

  hparams_nlinear_finite_memory2_no_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=1,
                                                 mem=MEMSIZE,
                                                 mu_prior_flag=0,
                                                 sigma_prior_flag=0,
                                                               )
  hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=tfn,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=1,
                                                mem=MEMSIZE,
                                                mu_prior_flag=1,
                                                sigma_prior_flag=0,
                                                              )

  hparams_nlinear_finite_memory2_no_sig_prior = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=tfn,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=1,
                                                 mem=MEMSIZE,
                                                 mu_prior_flag=1,
                                                 sigma_prior_flag=0,
                                                               )


  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)


  Nruns=50
  par=0
  NAgents = 10
  res=[[]for i in xrange(NAgents)]
  #
  for i in xrange(Nruns):
      print(i)
      algos = [
          UniformSampling('Uniform Sampling', hparams),
          # UniformSampling('Uniform Sampling 2', hparams),
          # FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
          # FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
          # PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
          # PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
          # PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
          NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
          NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory2', hparams_nlinear_finite_memory2),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noP',
                                                    hparams_nlinear_finite_memory_no_prior),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory2_noP',
                                                    hparams_nlinear_finite_memory2_no_prior),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP',
                                                    hparams_nlinear_finite_memory_no_sig_prior),
          NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory2_noSigP',
                                                    hparams_nlinear_finite_memory2_no_sig_prior),
          LinearFullPosteriorSampling('LinFullPost', hparams_linear),
          # BootstrappedBNNSampling('BootRMS', hparams_rms),
          # ParameterNoiseSampling('ParamNoise', hparams_pnoise),
          # PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
          # PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
      ]
      if par==0:

          # Run contextual bandit problem
          t_init = time.time()
          results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
          _, h_rewards = results

          # Display results
          display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
          # Append Results
          for j, a in enumerate(algos):
              res[j].append((np.sum(h_rewards[:, j])))
      else:
        par_res = Parallel(n_jobs=num_cores)(
            delayed(Run)(context_dim, num_actions, dataset, algos, opt_rewards, opt_actions, data_type) for i in xrange(Nruns))
        for j, rr in enumerate(par_res):
            res[j].append(rr[j])

      if i<(Nruns-3):
          algos=None


  display_final_results(algos,opt_rewards, res, data_type)
Пример #8
0
def main(argv):

    # Problem parameters
    num_contexts = 4000
    tfn = 400
    tfe = tfn * 2
    data_type = 'statlog'
    l_sizes = [50]
    outdir = "./"

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim, vocab_processor = sampled_vals

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_txt = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              batch_size=64,
                                              initial_lr=0.1,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              training_freq=1,
                                              training_freq_network=tfn,
                                              training_epochs=tfe,
                                              a0=6,
                                              b0=6,
                                              lambda_prior=0.25)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=64,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=tfn,
                                                  training_epochs=tfe,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_epsilon = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=64,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=tfn,
                                                  training_epochs=tfe,
                                                  epsilon=0.1)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=tfn,
        training_epochs=tfe,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=tfn,
        training_epochs=tfe,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=tfn,
        training_epochs=tfe,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)
    Nruns = 10
    n_algs = 5

    res = np.zeros((n_algs, num_contexts))
    totalreward = [0 for i in xrange(n_algs)]
    rewards = [[] for i in xrange(n_algs)]
    for i_run in xrange(Nruns):
        algos = [
            NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noP',
                hparams_nlinear_finite_memory_no_prior),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noSigP',
                hparams_nlinear_finite_memory_no_sig_prior),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        ]
        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        h_actions, h_rewards = results
        for j, a in enumerate(algos):
            print(np.sum(h_rewards[:, j]))
            totalreward[j] += ((np.sum(h_rewards[:, j])) / Nruns)
            rewards[j].append((np.sum(h_rewards[:, j])))
        actions = [[] for i in xrange(len(h_actions[0]))]
        for aa in h_actions:
            for i, a in enumerate(aa):
                actions[i].append(a)
        for i_alg in xrange(len(algos)):
            res[i_alg, :] += 1 * ((actions[i_alg] != opt_actions))
        if i_run < (Nruns - 1):
            algos = None
    display_final_results(algos, opt_rewards, opt_actions, rewards, data_type)
Пример #9
0
def main(_):

    # Problem parameters
    num_contexts = 40000

    # parameters of finite
    tfn = 400
    tfe = tfn * 2
    data_type = 'statlog'
    l_sizes = [50, 50]
    outdir = "./"

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = 'moon'
    nExperiment = 2
    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=3)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50, 50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50, 50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50, 50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                   context_dim=context_dim,
                                                   init_scale=0.3,
                                                   activation=tf.nn.relu,
                                                   layer_sizes=[50],
                                                   batch_size=512,
                                                   activate_decay=True,
                                                   initial_lr=0.1,
                                                   max_grad_norm=5.0,
                                                   show_training=False,
                                                   freq_summary=1000,
                                                   buffer_s=-1,
                                                   initial_pulls=2,
                                                   reset_lr=True,
                                                   lr_decay_rate=0.5,
                                                   training_freq=10,
                                                   training_freq_network=50,
                                                   training_epochs=100,
                                                   a0=6,
                                                   b0=6,
                                                   lambda_prior=0.25)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)

    hparams_pnoise = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=[50],
        batch_size=512,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        optimizer='RMS',
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=50,
        training_epochs=100,
        noise_std=0.05,
        eps=0.1,
        d_samples=300,
    )

    hparams_alpha_div = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=[50],
        batch_size=512,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        optimizer='RMS',
        use_sigma_exp_transform=True,
        cleared_times_trained=10,
        initial_training_steps=100,
        noise_sigma=0.1,
        reset_lr=False,
        training_freq=50,
        training_epochs=100,
        alpha=1.0,
        k=20,
        prior_variance=0.1)

    hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_outputs=num_actions,
                                             context_dim=context_dim,
                                             reset_lr=False,
                                             learn_embeddings=True,
                                             max_num_points=1000,
                                             show_training=False,
                                             freq_summary=1000,
                                             batch_size=512,
                                             keep_fixed_after_max_obs=True,
                                             training_freq=50,
                                             initial_pulls=2,
                                             training_epochs=100,
                                             lr=0.01,
                                             buffer_s=-1,
                                             initial_lr=0.001,
                                             lr_decay_rate=0.0,
                                             optimizer='RMS',
                                             task_latent_dim=5,
                                             activate_decay=False)
    hparams_greedy = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 optimizer='RMS',
                                                 training_freq=50,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 lambda_prior=0.25,
                                                 delta=0.01,
                                                 lamb=0.01,
                                                 mu=1,
                                                 S=1)
    hparams_ucb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              optimizer='RMS',
                                              training_freq=50,
                                              training_freq_network=50,
                                              training_epochs=100,
                                              lambda_prior=0.25,
                                              delta=0.01,
                                              lamb=0.01,
                                              mu=1,
                                              S=1)

    # Run contextual bandit problem
    t_init = time.time()
    for i in range(nExperiment):

        algos = [
            #UniformSampling('Uniform Sampling', hparams),
            #UniformSampling('Uniform Sampling 2', hparams),
            #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
            #FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
            #PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
            #PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
            #PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
            #NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
            #BootstrappedBNNSampling('BootRMS', hparams_rms),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noP', hparams_nlinear_finite_memory_no_prior),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior)
            #ParameterNoiseSampling('ParamNoise', hparams_pnoise),
            #PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
            #PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),hparams_ucb
            #NeuralUCBSampling('NeuralUCB', hparams_ucb)
            NeuralGreedy('NeuralGreedy', hparams_greedy)
        ]

        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        _, h_rewards = results
        np.savetxt("resultLin" + str(i) + ".csv",
                   h_rewards[:, 0],
                   delimiter=',')
        np.savetxt("resultMoon" + str(i) + ".csv",
                   h_rewards[:, 1],
                   delimiter=',')
        # Display results
        display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                        data_type)