示例#1
0
def main(_):

  # Problem parameters
  num_contexts = 2000

  # Data type in {linear, sparse_linear, mushroom, financial, jester,
  #                 statlog, adult, covertype, census, wheel}
  data_type = 'mushroom'

  # Create dataset
  sampled_vals = sample_data(data_type, num_contexts)
  dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

  # Define hyperparameters and algorithms
  hparams = tf.contrib.training.HParams(num_actions=num_actions)

  hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               a0=6,
                                               b0=6,
                                               lambda_prior=0.25,
                                               initial_pulls=2)

  hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            reset_lr=True,
                                            lr_decay_rate=0.5,
                                            training_freq=50,
                                            training_epochs=100,
                                            p=0.95,
                                            q=3)

  hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer='RMS',
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=100,
                                                use_dropout=True,
                                                keep_prob=0.80)

  hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                            context_dim=context_dim,
                                            init_scale=0.3,
                                            activation=tf.nn.relu,
                                            layer_sizes=[50],
                                            batch_size=512,
                                            activate_decay=True,
                                            initial_lr=0.1,
                                            max_grad_norm=5.0,
                                            show_training=False,
                                            freq_summary=1000,
                                            buffer_s=-1,
                                            initial_pulls=2,
                                            optimizer='RMS',
                                            use_sigma_exp_transform=True,
                                            cleared_times_trained=10,
                                            initial_training_steps=100,
                                            noise_sigma=0.1,
                                            reset_lr=False,
                                            training_freq=50,
                                            training_epochs=100)

  hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=[50],
                                                batch_size=512,
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=1,
                                                training_freq_network=50,
                                                training_epochs=100,
                                                a0=6,
                                                b0=6,
                                                lambda_prior=0.25)

  hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 init_scale=0.3,
                                                 activation=tf.nn.relu,
                                                 layer_sizes=[50],
                                                 batch_size=512,
                                                 activate_decay=True,
                                                 initial_lr=0.1,
                                                 max_grad_norm=5.0,
                                                 show_training=False,
                                                 freq_summary=1000,
                                                 buffer_s=-1,
                                                 initial_pulls=2,
                                                 reset_lr=True,
                                                 lr_decay_rate=0.5,
                                                 training_freq=10,
                                                 training_freq_network=50,
                                                 training_epochs=100,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25)

  hparams_pnoise = tf.contrib.training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               layer_sizes=[50],
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               optimizer='RMS',
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=50,
                                               training_epochs=100,
                                               noise_std=0.05,
                                               eps=0.1,
                                               d_samples=300,
                                              )

  hparams_alpha_div = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  alpha=1.0,
                                                  k=20,
                                                  prior_variance=0.1)

  hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                           num_outputs=num_actions,
                                           context_dim=context_dim,
                                           reset_lr=False,
                                           learn_embeddings=True,
                                           max_num_points=1000,
                                           show_training=False,
                                           freq_summary=1000,
                                           batch_size=512,
                                           keep_fixed_after_max_obs=True,
                                           training_freq=50,
                                           initial_pulls=2,
                                           training_epochs=100,
                                           lr=0.01,
                                           buffer_s=-1,
                                           initial_lr=0.001,
                                           lr_decay_rate=0.0,
                                           optimizer='RMS',
                                           task_latent_dim=5,
                                           activate_decay=False)

  algos = [
      UniformSampling('Uniform Sampling', hparams),
      UniformSampling('Uniform Sampling 2', hparams),
      FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
      FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
      PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
      PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
      PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
      NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
      NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
      LinearFullPosteriorSampling('LinFullPost', hparams_linear),
      BootstrappedBNNSampling('BootRMS', hparams_rms),
      ParameterNoiseSampling('ParamNoise', hparams_pnoise),
      PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
      PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
  ]

  # Run contextual bandit problem
  t_init = time.time()
  results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
  _, h_rewards = results

  # Display results
  display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)
示例#2
0
def main(_):

    # Problem parameters
    num_contexts = 20000
    nb_simulations = 2
    l_sizes = [50, 50]
    plt_dir = "plots/"
    dict_dir = "dicts/"

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = 'adult'

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=l_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=l_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_nlinear2 = tf.contrib.training.HParams(num_actions=num_actions,
                                                   context_dim=context_dim,
                                                   init_scale=0.3,
                                                   activation=tf.nn.relu,
                                                   layer_sizes=l_sizes,
                                                   batch_size=512,
                                                   activate_decay=True,
                                                   initial_lr=0.1,
                                                   max_grad_norm=5.0,
                                                   show_training=False,
                                                   freq_summary=1000,
                                                   buffer_s=-1,
                                                   initial_pulls=2,
                                                   reset_lr=True,
                                                   lr_decay_rate=0.5,
                                                   training_freq=10,
                                                   training_freq_network=50,
                                                   training_epochs=100,
                                                   a0=6,
                                                   b0=6,
                                                   lambda_prior=0.25)

    hparams_nlinear_finite_memory = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=1)

    hparams_nlinear_finite_memory_no_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=0,
        sigma_prior_flag=0)

    hparams_nlinear_finite_memory_no_sig_prior = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        init_scale=0.3,
        activation=tf.nn.relu,
        layer_sizes=l_sizes,
        batch_size=64,
        activate_decay=True,
        initial_lr=0.1,
        max_grad_norm=5.0,
        show_training=False,
        freq_summary=1000,
        buffer_s=-1,
        initial_pulls=2,
        reset_lr=True,
        lr_decay_rate=0.5,
        training_freq=1,
        training_freq_network=50,
        training_epochs=100,
        a0=6,
        b0=6,
        lambda_prior=1,
        mem=num_actions * 100,
        mu_prior_flag=1,
        sigma_prior_flag=0)

    hparams_ucb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=l_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              optimizer='RMS',
                                              training_freq=1,
                                              training_freq_network=50,
                                              training_epochs=100,
                                              lambda_prior=0.25,
                                              delta=0.01,
                                              lamb=0.01,
                                              mu=1,
                                              S=1)

    algos = [
        #UniformSampling('Uniform Sampling', hparams),
        #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
        PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        NeuralLinearPosteriorSamplingFiniteMemory(
            'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
        NeuralLinearPosteriorSamplingFiniteMemory(
            'NeuralLinearFiniteMemory_noP',
            hparams_nlinear_finite_memory_no_prior),
        #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior),
        #NeuralUCBSampling('NeuralUCB', hparams_ucb)
    ]

    regrets = {}
    rewards = {}
    for a in algos:
        regrets[a.name] = np.zeros((nb_simulations, num_contexts))
        rewards[a.name] = np.zeros(nb_simulations)
    rewards['opt_reward'] = np.zeros(nb_simulations)

    for k in range(nb_simulations):

        algos = [
            #UniformSampling('Uniform Sampling', hparams),
            #FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
            PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
            PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
            NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
            #NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
            LinearFullPosteriorSampling('LinFullPost', hparams_linear),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory', hparams_nlinear_finite_memory),
            NeuralLinearPosteriorSamplingFiniteMemory(
                'NeuralLinearFiniteMemory_noP',
                hparams_nlinear_finite_memory_no_prior),
            #NeuralLinearPosteriorSamplingFiniteMemory('NeuralLinearFiniteMemory_noSigP', hparams_nlinear_finite_memory_no_sig_prior),
            #NeuralUCBSampling('NeuralUCB', hparams_ucb)
        ]

        # Run contextual bandit problem
        t_init = time.time()
        results = run_contextual_bandit(context_dim, num_actions, dataset,
                                        algos)
        _, h_rewards = results

        # Display results
        display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                        data_type)

        for j, a in enumerate(algos):
            regrets[a.name][k, :] = np.cumsum(opt_rewards - h_rewards[:, j])
            rewards[a.name][k] = np.sum(h_rewards[:, j])
        rewards['opt_reward'][k] = np.sum(opt_rewards)

    save_plot(algos, regrets, data_type, num_contexts, plt_dir)
    np.save(dict_dir + 'dict_' + data_type + '.npy', rewards)
示例#3
0
def main(_):

    np.random.seed(FLAGS.seed)
    tf.set_random_seed(FLAGS.seed)

    dt = datetime.datetime.now()
    timestr = '{}-{}-{}-{}'.format(dt.month, dt.day, dt.hour, dt.minute)
    FLAGS.logdir = os.path.join(FLAGS.logdir, timestr)

    # Problem parameters
    num_contexts = FLAGS.num_context

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = FLAGS.bandit

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    layer_sizes = [int(i) for i in FLAGS.layers.split(',')]

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=layer_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=2000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=20)

    hparams_dropout = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=layer_sizes,
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=2000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  use_dropout=True,
                                                  keep_prob=0.80)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=layer_sizes,
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=True,
                                              freq_summary=2000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=20,
                                              initial_training_steps=2000,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_gp = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_outputs=num_actions,
                                             context_dim=context_dim,
                                             reset_lr=False,
                                             learn_embeddings=True,
                                             max_num_points=1000,
                                             show_training=False,
                                             freq_summary=2000,
                                             batch_size=512,
                                             keep_fixed_after_max_obs=True,
                                             training_freq=50,
                                             initial_pulls=2,
                                             training_epochs=100,
                                             lr=0.01,
                                             buffer_s=-1,
                                             initial_lr=0.001,
                                             lr_decay_rate=0.0,
                                             optimizer='RMS',
                                             task_latent_dim=5,
                                             activate_decay=False)

    hparams_fsvgd = tf.contrib.training.HParams(
        num_actions=num_actions,
        context_dim=context_dim,
        #   activation=tf.nn.relu,
        layer_sizes=layer_sizes,
        batch_size=512,
        activate_decay=False,
        initial_lr=0.1,
        lr=FLAGS.lr,
        n_mm_sample=4,
        mm_n_particles=40,
        mm_jitter=FLAGS.mm_jitter,
        #   max_grad_norm=5.0,
        show_training=True,
        freq_summary=2000,
        buffer_s=-1,
        initial_pulls=2,
        optimizer='Adam',
        use_sigma_exp_transform=True,
        cleared_times_trained=20,
        initial_training_steps=2000,
        noise_sigma=0.1,
        reset_lr=False,
        training_freq=50,
        training_epochs=100,
        n_particles=20,
        interp_batch_size=FLAGS.interp_batch_size,
        prior_variance=FLAGS.prior_variance)

    algos = [
        UniformSampling('Uniform Sampling', hparams),
        #     UniformSampling('Uniform Sampling 2', hparams),
        #     FixedPolicySampling('fixed1', [0.75, 0.25], hparams),
        #     FixedPolicySampling('fixed2', [0.25, 0.75], hparams),
        PosteriorBNNSampling('fSVGD', hparams_fsvgd, 'SVGD'),
        #     PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
        #     PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        #     NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        #     NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        #     LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        BootstrappedBNNSampling('BootRMS', hparams_rms),
        #     ParameterNoiseSampling('ParamNoise', hparams_pnoise),
        #     PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
        PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
    ]

    # Run contextual bandit problem
    t_init = time.time()
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos,
                                    opt_rewards)
    _, h_rewards = results

    # Display results
    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init,
                    data_type)
示例#4
0
def run_iter():

    # Data type in {linear, sparse_linear, mushroom, financial, jester,
    #                 statlog, adult, covertype, census, wheel}
    data_type = FLAGS.dataset

    # Create dataset
    sampled_vals = sample_data(data_type, num_contexts)
    dataset, opt_rewards, opt_actions, num_actions, context_dim = sampled_vals

    # Define hyperparameters and algorithms
    hparams = tf.contrib.training.HParams(num_actions=num_actions)

    hparams_linear = tf.contrib.training.HParams(num_actions=num_actions,
                                                 context_dim=context_dim,
                                                 a0=6,
                                                 b0=6,
                                                 lambda_prior=0.25,
                                                 initial_pulls=2)

    hparams_rms = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              reset_lr=True,
                                              lr_decay_rate=0.5,
                                              training_freq=50,
                                              training_epochs=100,
                                              p=0.95,
                                              q=3)

    hparams_bbb = tf.contrib.training.HParams(num_actions=num_actions,
                                              context_dim=context_dim,
                                              init_scale=0.3,
                                              activation=tf.nn.relu,
                                              layer_sizes=[50],
                                              batch_size=512,
                                              activate_decay=True,
                                              initial_lr=0.1,
                                              max_grad_norm=5.0,
                                              show_training=False,
                                              freq_summary=1000,
                                              buffer_s=-1,
                                              initial_pulls=2,
                                              optimizer='RMS',
                                              use_sigma_exp_transform=True,
                                              cleared_times_trained=10,
                                              initial_training_steps=100,
                                              noise_sigma=0.1,
                                              reset_lr=False,
                                              training_freq=50,
                                              training_epochs=100)

    hparams_nlinear = tf.contrib.training.HParams(num_actions=num_actions,
                                                  context_dim=context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=1,
                                                  training_freq_network=50,
                                                  training_epochs=100,
                                                  a0=6,
                                                  b0=6,
                                                  lambda_prior=0.25)

    hparams_luga = tf.contrib.training.HParams(num_actions=num_actions,
                                             num_contexts=num_contexts,
                                             context_dim=context_dim,
                                             activation=tf.nn.relu,
                                             latent_dim=50,
                                             batch_size=512,
                                             initial_lr=2e-4,
                                             show_training=False,
                                             lr_decay=False,
                                             freq_summary=10000,
                                             buffer_s=-1,
                                             initial_pulls=2,
                                             training_freq=20,
                                             training_epochs=40,
                                             lambda_prior=0.25,
                                             show_loss=False,
                                             kl=1.0,
                                             recon=1.0,
                                             psigma=1.0,
                                             glnoise=False)

    hparams_sivi1 = tf.contrib.training.HParams(num_actions=num_actions,
                                                num_contexts=num_contexts,
                                                context_dim=context_dim,
                                                activation=tf.nn.relu,
                                                latent_dim=50,
                                                batch_size=512,
                                                initial_lr=1e-3,
                                                show_training=False,
                                                verbose=False,
                                                lr_decay=False,
                                                freq_summary=10000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                training_freq=20,
                                                training_epochs=40,
                                                lambda_prior=0.25,
                                                show_loss=False,
                                                kl=1.0,
                                                recon=1.0,
                                                two_decoder=False,
                                                glnoise=False,
                                                psigma=1.25)
    
    hparams_lusi_abl_km = tf.contrib.training.HParams(num_actions=num_actions,
                                                      num_contexts=num_contexts,
                                                      context_dim=context_dim,
                                                      activation=tf.nn.relu,
                                                      latent_dim=50,
                                                      batch_size=512,
                                                      initial_lr=1e-3,
                                                      show_training=False,
                                                      verbose=False,
                                                      lr_decay=False,
                                                      freq_summary=10000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      training_freq=20,
                                                      training_epochs=40,
                                                      lambda_prior=0.25,
                                                      show_loss=False,
                                                      km=1,
                                                      onez=0,
                                                      recon=1.0,
                                                      two_decoder=False,
                                                      glnoise=False,
                                                      psigma=1.25)

    hparams_luga_abl_km = tf.contrib.training.HParams(num_actions=num_actions,
                                                      num_contexts=num_contexts,
                                                      context_dim=context_dim,
                                                      activation=tf.nn.relu,
                                                      latent_dim=50,
                                                      batch_size=512,
                                                      initial_lr=2e-4,
                                                      show_training=False,
                                                      lr_decay=False,
                                                      freq_summary=10000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      training_freq=20,
                                                      training_epochs=40,
                                                      lambda_prior=0.25,
                                                      show_loss=False,
                                                      km=1,
                                                      onez=0,
                                                      recon=1.0,
                                                      psigma=1.0,
                                                      glnoise=False)


    algos = [
        UniformSampling('Uniform Sampling', hparams), #1

        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'), #2

        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear), #3

        LinearFullPosteriorSampling('LinFullPost', hparams_linear), #4

        PiposteriorBNNSampling('DGF', hparams_bbb, 'DGF'), #5

        VariationalSampling_v4('LU_Gaussian', hparams_luga), #6

        # A smaller learning rate like 3e-4 or 1e-4 will work better on the 'mushroom' dataset for LU_SIVI and LU_Gaussian
        VariationalSamplingSivi_dgf_v7("LU_SIVI", hparams_sivi1), #7 

        # For Ablation Study

        VariationalSampling_abl('LU_Gaussian_Ablation_multi_z_1m', hparams_luga_abl_km),

        VariationalSamplingSivi_dgf_abl("LU_SIVI_Ablation_multi_z_1m", hparams_lusi_abl_km)

    ]

    t_init = time.time()
    results = run_contextual_bandit(context_dim, num_actions, dataset, algos)
    _, h_rewards = results

    display_results(algos, opt_rewards, opt_actions, h_rewards, t_init, data_type)

    opt_rewards = opt_rewards.reshape([-1, 1])
    regret_i = opt_rewards - h_rewards

    return regret_i
    for layer_size in layer_sizes:
        for batch_size in batch_sizes:
            for optimizer in optimizers:
                print(batch_size, layer_size, optimizer,
                      'NG_bs%s_ls%ix50_%s' % (batch_size, len(layer_size), optimizer))
                neural_greedy_protos.append(lambda param=[batch_size, layer_size, optimizer]: PosteriorBNNSampling(
                    'NG_bs%s_ls%ix50_%s' % (param[0], len(param[1]), param[2]),
                    tf.contrib.training.HParams(num_actions=num_actions,
                                                context_dim=context_dim,
                                                init_scale=0.3,
                                                activation=tf.nn.relu,
                                                layer_sizes=param[1],
                                                batch_size=param[0],
                                                activate_decay=True,
                                                initial_lr=0.1,
                                                max_grad_norm=5.0,
                                                show_training=False,
                                                freq_summary=1000,
                                                buffer_s=-1,
                                                initial_pulls=2,
                                                optimizer=param[2],
                                                reset_lr=True,
                                                lr_decay_rate=0.5,
                                                training_freq=50,
                                                training_epochs=50),
                    'RMSProp'))

    print(len(neural_greedy_protos))

    random_proto = lambda: UniformSampling('Uniform Sampling', hparams)
    linThompson_proto = lambda: LinearFullPosteriorSampling('linThompson', hparams_linear)
    def __init__(self, ontology):
        super(BanditTrackerTF, self).__init__(ontology)
        self.bc = BertClient(check_version=False,
                             check_length=False,
                             ip="compute-0-1.local")

        self.num_actions = 2  # Possible actions : Update state, Do Not update sate

        self.context_dim = 2049  # Concatenation of all features

        # Define hyper parameters to use in Contextual Bandit Algorithm
        hparams_linear = tf.contrib.training.HParams(
            num_actions=self.num_actions,
            context_dim=self.context_dim,
            a0=6,
            b0=6,
            lambda_prior=0.25,
            initial_pulls=2)

        hparams_dropout = tf.contrib.training.HParams(
            num_actions=self.num_actions,
            context_dim=self.context_dim,
            init_scale=0.3,
            activation=tf.nn.relu,
            layer_sizes=[50],
            batch_size=512,
            activate_decay=True,
            initial_lr=0.1,
            max_grad_norm=5.0,
            show_training=False,
            freq_summary=1000,
            buffer_s=-1,
            initial_pulls=2,
            optimizer='RMS',
            reset_lr=True,
            lr_decay_rate=0.5,
            training_freq=50,
            training_epochs=100,
            use_dropout=True,
            keep_prob=0.80)

        self.food_dataset, self.food_opt_rewards, self.food_opt_actions, _, _ = self.get_dataset(
            pickle.load(
                open(
                    "/home/l.fischer/DSTC2_Baselines/training_data/train_data_food_v2",
                    "rb")))

        self.area_dataset, self.area_opt_rewards, self.area_opt_actions, _, _ = self.get_dataset(
            pickle.load(
                open(
                    "/home/l.fischer/DSTC2_Baselines/training_data/train_data_area_v2",
                    "rb")))

        self.price_dataset, self.price_opt_rewards, self.price_opt_actions, _, _ = self.get_dataset(
            pickle.load(
                open(
                    "/home/l.fischer/DSTC2_Baselines/training_data/train_data_pricerange_v2",
                    "rb")))

        self.food_algo = PosteriorBNNSampling('Dropout_food', hparams_dropout,
                                              'RMSProp')

        self.area_algo = PosteriorBNNSampling('Dropout_area', hparams_dropout,
                                              'RMSProp')

        self.price_algo = PosteriorBNNSampling('Dropout_price',
                                               hparams_dropout, 'RMSProp')

        self.train()
class BanditTrackerTF(AbstractTracker):
    def __init__(self, ontology):
        super(BanditTrackerTF, self).__init__(ontology)
        self.bc = BertClient(check_version=False,
                             check_length=False,
                             ip="compute-0-1.local")

        self.num_actions = 2  # Possible actions : Update state, Do Not update sate

        self.context_dim = 2049  # Concatenation of all features

        # Define hyper parameters to use in Contextual Bandit Algorithm
        hparams_linear = tf.contrib.training.HParams(
            num_actions=self.num_actions,
            context_dim=self.context_dim,
            a0=6,
            b0=6,
            lambda_prior=0.25,
            initial_pulls=2)

        hparams_dropout = tf.contrib.training.HParams(
            num_actions=self.num_actions,
            context_dim=self.context_dim,
            init_scale=0.3,
            activation=tf.nn.relu,
            layer_sizes=[50],
            batch_size=512,
            activate_decay=True,
            initial_lr=0.1,
            max_grad_norm=5.0,
            show_training=False,
            freq_summary=1000,
            buffer_s=-1,
            initial_pulls=2,
            optimizer='RMS',
            reset_lr=True,
            lr_decay_rate=0.5,
            training_freq=50,
            training_epochs=100,
            use_dropout=True,
            keep_prob=0.80)

        self.food_dataset, self.food_opt_rewards, self.food_opt_actions, _, _ = self.get_dataset(
            pickle.load(
                open(
                    "/home/l.fischer/DSTC2_Baselines/training_data/train_data_food_v2",
                    "rb")))

        self.area_dataset, self.area_opt_rewards, self.area_opt_actions, _, _ = self.get_dataset(
            pickle.load(
                open(
                    "/home/l.fischer/DSTC2_Baselines/training_data/train_data_area_v2",
                    "rb")))

        self.price_dataset, self.price_opt_rewards, self.price_opt_actions, _, _ = self.get_dataset(
            pickle.load(
                open(
                    "/home/l.fischer/DSTC2_Baselines/training_data/train_data_pricerange_v2",
                    "rb")))

        self.food_algo = PosteriorBNNSampling('Dropout_food', hparams_dropout,
                                              'RMSProp')

        self.area_algo = PosteriorBNNSampling('Dropout_area', hparams_dropout,
                                              'RMSProp')

        self.price_algo = PosteriorBNNSampling('Dropout_price',
                                               hparams_dropout, 'RMSProp')

        self.train()

    def is_word_in_ontology(self, word, slot_type="food"):
        """
        Returns a boolean saying whether a given word is present in the ontology
        :param word: The word to check if it's in the ontology
        :param slot_type: The type of slot in which to check if the word is present
        :return: a Boolean saying whether a given word is present in the ontology
        """

        if slot_type == "food":
            return int(word in self.ontology["informable"]["food"])

        elif slot_type == "area":
            return int(word in self.ontology["informable"]["area"])

        else:
            return int(word in self.ontology["informable"]["pricerange"])

    def get_dataset(self, data_object):
        # convert to np_array
        data_object["features"] = normalize(np.array(data_object["features"]),
                                            norm="l1")
        data_object["labels"] = np.array(data_object["labels"])

        rewards = np.array([(0, 1) if label else (1, 0)
                            for label in data_object["labels"]])

        num_actions = 2  # Actions are : Update state, Do Not Update state
        context_dim = 2049
        # noise_stds = [0.01 * (i + 1) for i in range(num_actions)]

        betas = np.random.uniform(-1, 1, (context_dim, num_actions))
        betas /= np.linalg.norm(betas, axis=0)

        # rewards = np.random.randint(2, size=(10000, 2))
        opt_actions = np.argmax(rewards, axis=1)

        opt_rewards = np.array(
            [rewards[i, act] for i, act in enumerate(opt_actions)])
        return np.hstack(
            (data_object["features"],
             rewards)), opt_rewards, opt_actions, num_actions, context_dim

    def train(self):

        # Instantiate Contextual Bandit Object
        food_bandit = ContextualBandit(self.context_dim, self.num_actions)

        food_bandit.feed_data(self.food_dataset)

        # Training food bandit classifier

        print("Training food")
        for i in tqdm(range(self.food_dataset.shape[0])):
            context = food_bandit.context(i)
            action = self.food_algo.action(context)
            reward = food_bandit.reward(i, action)

            self.food_algo.update(context, action, reward)

        # Instantiate Contextual Bandit Object
        area_bandit = ContextualBandit(self.context_dim, self.num_actions)

        area_bandit.feed_data(self.area_dataset)

        # Training area bandit classifier

        print("Training area")
        for i in tqdm(range(self.area_dataset.shape[0])):
            context = area_bandit.context(i)
            action = self.area_algo.action(context)
            reward = area_bandit.reward(i, action)

            self.area_algo.update(context, action, reward)

        # Instantiate Contextual Bandit Object
        price_bandit = ContextualBandit(self.context_dim, self.num_actions)

        price_bandit.feed_data(self.price_dataset)

        # Training price bandit classifier

        print("Training price")
        for i in tqdm(range(self.price_dataset.shape[0])):
            context = price_bandit.context(i)
            action = self.price_algo.action(context)
            reward = price_bandit.reward(i, action)

            self.price_algo.update(context, action, reward)

        print("Training Complete")

    def addTurn(self, turn):
        """
        Adds a turn to this tracker
        :param turn: The turn to process and add
        :return: A hypothesis of the current state of the dialog
        """
        hyps = copy.deepcopy(self.hyps)

        goal_stats = defaultdict(lambda: defaultdict(float))

        # Obtaining the best hypothesis from the ASR module
        best_asr_hyp = turn['input']["live"]['asr-hyps'][0]["asr-hyp"]

        # English stopwords set with punctuation
        stop = stopwords.words('english') + list(string.punctuation)

        # Tokenize the best hypothesis on the whitespaces
        tkns = word_tokenize(best_asr_hyp)

        # Remove stop words and also shingle the tokens
        processed_hyp = [
            word for word in tkns if word not in stop
        ]  # + [tup[0] + " " + tup[1] for tup in ngrams(tkns, 2)]

        # Manually change from "moderately"/"affordable" to "moderate" and "cheaper" to "cheap"
        for idx, word in enumerate(processed_hyp):
            if word == "moderately" or word == "affordable":
                processed_hyp[idx] = "moderate"
            if word == "cheaper":
                processed_hyp[idx] = "cheap"

        if processed_hyp:

            # Create an embedding of the user utterance using BERT
            sentence_embedding = np.array(self.bc.encode([best_asr_hyp]))[0]

            # Iterate through all the words in the user utterance to obtain the features needed
            for word in processed_hyp:

                # Create and embedding of the word, being iterated, using BERT
                word_embedding = np.array(self.bc.encode([word]))[0]

                # Check whether the current word is present in the ontology, in one of the slot types
                word_in_food_ontology = [
                    self.is_word_in_ontology(word, slot_type="food")
                ]
                word_in_area_ontology = [
                    self.is_word_in_ontology(word, slot_type="area")
                ]
                word_in_price_ontology = [
                    self.is_word_in_ontology(word, slot_type="price")
                ]

                # Concatenate the features together (the result is a vector of size 2049)
                food_features = np.concatenate(
                    (word_embedding, sentence_embedding,
                     word_in_food_ontology))
                area_features = np.concatenate(
                    (word_embedding, sentence_embedding,
                     word_in_area_ontology))
                price_features = np.concatenate(
                    (word_embedding, sentence_embedding,
                     word_in_price_ontology))

                # Decide whether the current word should update one (or more) of the slot types
                update_food_slot = self.food_algo.action(food_features)
                update_area_slot = self.area_algo.action(area_features)
                update_price_slot = self.price_algo.action(price_features)

                if update_food_slot:
                    goal_stats["food"][word] += 1.0

                if update_area_slot:
                    goal_stats["area"][word] += 1.0

                if update_price_slot:
                    goal_stats["pricerange"][word] += 1.0

            # pick top values for each slot
        super(BanditTrackerTF, self).fill_goal_labels(goal_stats, hyps)
        super(BanditTrackerTF, self).fill_joint_goals(hyps)

        self.hyps = hyps
        return self.hyps
示例#8
0
def main(_):
    # create dataset
    data_type = "job_bank"
    num_contexts = 2000
    num_actions = 2
    context_dim = 2
    dataset = np.empty((num_contexts, 4), dtype=np.float)
    opt_actions = np.empty(num_contexts, dtype=np.int)
    opt_rewards = np.empty(num_contexts, dtype=np.float)
    for iter in range(num_contexts):
        ctx = context_bandit_gen_context()
        all_probs = [context_bandit_prob(ctx, a) for a in range(num_actions)]
        optimal = np.argmax(all_probs)
        rewards = [context_bandit_reward(ctx, a) for a in range(num_actions)]
        dataset[iter, :] = np.array(ctx.tolist() + rewards)
        opt_actions[iter] = optimal
        opt_rewards[iter] = all_probs[optimal]

    hparams = HParams(num_actions=num_actions)

    hparams_linear = HParams(num_actions=num_actions,
                             context_dim=context_dim,
                             a0=6,
                             b0=6,
                             lambda_prior=0.25,
                             initial_pulls=2)

    hparams_rms = HParams(num_actions=num_actions,
                          context_dim=context_dim,
                          init_scale=0.3,
                          activation=tf.nn.relu,
                          layer_sizes=[50],
                          batch_size=512,
                          activate_decay=True,
                          initial_lr=0.1,
                          max_grad_norm=5.0,
                          show_training=False,
                          freq_summary=1000,
                          buffer_s=-1,
                          initial_pulls=2,
                          optimizer='RMS',
                          reset_lr=True,
                          lr_decay_rate=0.5,
                          training_freq=50,
                          training_epochs=100,
                          p=0.95,
                          q=3,
                          verbose=False)

    hparams_dropout = HParams(num_actions=num_actions,
                              context_dim=context_dim,
                              init_scale=0.3,
                              activation=tf.nn.relu,
                              layer_sizes=[50],
                              batch_size=512,
                              activate_decay=True,
                              initial_lr=0.1,
                              max_grad_norm=5.0,
                              show_training=False,
                              freq_summary=1000,
                              buffer_s=-1,
                              initial_pulls=2,
                              optimizer='RMS',
                              reset_lr=True,
                              lr_decay_rate=0.5,
                              training_freq=50,
                              training_epochs=100,
                              use_dropout=True,
                              keep_prob=0.80,
                              verbose=False)

    hparams_bbb = HParams(num_actions=num_actions,
                          context_dim=context_dim,
                          init_scale=0.3,
                          activation=tf.nn.relu,
                          layer_sizes=[50],
                          batch_size=512,
                          activate_decay=True,
                          initial_lr=0.1,
                          max_grad_norm=5.0,
                          show_training=False,
                          freq_summary=1000,
                          buffer_s=-1,
                          initial_pulls=2,
                          optimizer='RMS',
                          use_sigma_exp_transform=True,
                          cleared_times_trained=10,
                          initial_training_steps=100,
                          noise_sigma=0.1,
                          reset_lr=False,
                          training_freq=50,
                          training_epochs=100,
                          verbose=False)

    hparams_nlinear = HParams(num_actions=num_actions,
                              context_dim=context_dim,
                              init_scale=0.3,
                              activation=tf.nn.relu,
                              layer_sizes=[50],
                              batch_size=512,
                              activate_decay=True,
                              initial_lr=0.1,
                              max_grad_norm=5.0,
                              show_training=False,
                              freq_summary=1000,
                              buffer_s=-1,
                              initial_pulls=2,
                              reset_lr=True,
                              lr_decay_rate=0.5,
                              training_freq=1,
                              training_freq_network=50,
                              training_epochs=100,
                              a0=6,
                              b0=6,
                              lambda_prior=0.25,
                              verbose=False)

    hparams_nlinear2 = HParams(num_actions=num_actions,
                               context_dim=context_dim,
                               init_scale=0.3,
                               activation=tf.nn.relu,
                               layer_sizes=[50],
                               batch_size=512,
                               activate_decay=True,
                               initial_lr=0.1,
                               max_grad_norm=5.0,
                               show_training=False,
                               freq_summary=1000,
                               buffer_s=-1,
                               initial_pulls=2,
                               reset_lr=True,
                               lr_decay_rate=0.5,
                               training_freq=10,
                               training_freq_network=50,
                               training_epochs=100,
                               a0=6,
                               b0=6,
                               lambda_prior=0.25,
                               verbose=False)

    hparams_pnoise = HParams(num_actions=num_actions,
                             context_dim=context_dim,
                             init_scale=0.3,
                             activation=tf.nn.relu,
                             layer_sizes=[50],
                             batch_size=512,
                             activate_decay=True,
                             initial_lr=0.1,
                             max_grad_norm=5.0,
                             show_training=False,
                             freq_summary=1000,
                             buffer_s=-1,
                             initial_pulls=2,
                             optimizer='RMS',
                             reset_lr=True,
                             lr_decay_rate=0.5,
                             training_freq=50,
                             training_epochs=100,
                             noise_std=0.05,
                             eps=0.1,
                             d_samples=300,
                             verbose=False)

    hparams_alpha_div = HParams(num_actions=num_actions,
                                context_dim=context_dim,
                                init_scale=0.3,
                                activation=tf.nn.relu,
                                layer_sizes=[50],
                                batch_size=512,
                                activate_decay=True,
                                initial_lr=0.1,
                                max_grad_norm=5.0,
                                show_training=False,
                                freq_summary=1000,
                                buffer_s=-1,
                                initial_pulls=2,
                                optimizer='RMS',
                                use_sigma_exp_transform=True,
                                cleared_times_trained=10,
                                initial_training_steps=100,
                                noise_sigma=0.1,
                                reset_lr=False,
                                training_freq=50,
                                training_epochs=100,
                                alpha=1.0,
                                k=20,
                                prior_variance=0.1,
                                verbose=False)

    hparams_gp = HParams(num_actions=num_actions,
                         num_outputs=num_actions,
                         context_dim=context_dim,
                         reset_lr=False,
                         learn_embeddings=True,
                         max_num_points=1000,
                         show_training=False,
                         freq_summary=1000,
                         batch_size=512,
                         keep_fixed_after_max_obs=True,
                         training_freq=50,
                         initial_pulls=2,
                         training_epochs=100,
                         lr=0.01,
                         buffer_s=-1,
                         initial_lr=0.001,
                         lr_decay_rate=0.0,
                         optimizer='RMS',
                         task_latent_dim=5,
                         activate_decay=False,
                         verbose=False)

    algos = [
        UniformSampling('Uniform Sampling', hparams),
        FixedPolicySampling('Fixed 1', [0.75, 0.25], hparams),
        FixedPolicySampling('Fixed 2', [0.25, 0.75], hparams),
        PosteriorBNNSampling('RMS', hparams_rms, 'RMSProp'),
        PosteriorBNNSampling('Dropout', hparams_dropout, 'RMSProp'),
        PosteriorBNNSampling('BBB', hparams_bbb, 'Variational'),
        NeuralLinearPosteriorSampling('NeuralLinear', hparams_nlinear),
        NeuralLinearPosteriorSampling('NeuralLinear2', hparams_nlinear2),
        LinearFullPosteriorSampling('LinFullPost', hparams_linear),
        BootstrappedBNNSampling('BootRMS', hparams_rms),
        ParameterNoiseSampling('ParamNoise', hparams_pnoise),
        PosteriorBNNSampling('BBAlphaDiv', hparams_alpha_div, 'AlphaDiv'),
        PosteriorBNNSampling('MultitaskGP', hparams_gp, 'GP'),
    ]

    _, h_rewards, times = run_contextual_bandit(context_dim, num_actions,
                                                dataset, algos)

    display_results(algos, opt_rewards, opt_actions, h_rewards, times,
                    data_type)
def main(argv):
    opts = get_options()
    print("Parameters: {}".format(opts))
    address = ('localhost', opts.ipc_port)  # family is deduced to be 'AF_INET'
    listener = Listener(address, authkey=b'bandit')
    conn = listener.accept()
    multiprocessing.current_process().authkey = b'bandit'
    print('connection accepted from', listener.last_accepted)


    # Create contextual bandit
    bandit = IPCBandit(conn)

    if opts.algorithm == "uniform":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions)
        policy = UniformSampling('Uniform Sampling', policy_parameters)

    elif opts.algorithm == "linear":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                     context_dim=bandit.context_dim,
                                                     a0=6,
                                                     b0=6,
                                                     lambda_prior=0.25,
                                                     initial_pulls=2)
        policy = LinearFullPosteriorSampling('LinFullPost', policy_parameters)

    elif opts.algorithm == "rms":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  p=0.95,
                                                  q=3)
        policy = PosteriorBNNSampling('RMS', policy_parameters, 'RMSProp')

    elif opts.algorithm == "bootrms":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  reset_lr=True,
                                                  lr_decay_rate=0.5,
                                                  training_freq=50,
                                                  training_epochs=100,
                                                  p=0.95,
                                                  q=3)
        policy =BootstrappedBNNSampling('BootRMS', policy_parameters)

    elif opts.algorithm == "dropout":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                      context_dim=bandit.context_dim,
                                                      init_scale=0.3,
                                                      activation=tf.nn.relu,
                                                      layer_sizes=[50],
                                                      batch_size=512,
                                                      activate_decay=True,
                                                      initial_lr=0.1,
                                                      max_grad_norm=5.0,
                                                      show_training=False,
                                                      freq_summary=1000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      optimizer='RMS',
                                                      reset_lr=True,
                                                      lr_decay_rate=0.5,
                                                      training_freq=50,
                                                      training_epochs=100,
                                                      use_dropout=True,
                                                      keep_prob=0.80)
        policy = PosteriorBNNSampling('Dropout', policy_parameters, 'RMSProp')

    elif opts.algorithm == "bbb":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                  context_dim=bandit.context_dim,
                                                  init_scale=0.3,
                                                  activation=tf.nn.relu,
                                                  layer_sizes=[50],
                                                  batch_size=512,
                                                  activate_decay=True,
                                                  initial_lr=0.1,
                                                  max_grad_norm=5.0,
                                                  show_training=False,
                                                  freq_summary=1000,
                                                  buffer_s=-1,
                                                  initial_pulls=2,
                                                  optimizer='RMS',
                                                  use_sigma_exp_transform=True,
                                                  cleared_times_trained=10,
                                                  initial_training_steps=100,
                                                  noise_sigma=0.1,
                                                  reset_lr=False,
                                                  training_freq=50,
                                                  training_epochs=100)
        policy = PosteriorBNNSampling('BBB', policy_parameters, 'Variational')

    elif opts.algorithm == "neurallinear":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                      context_dim=bandit.context_dim,
                                                      init_scale=0.3,
                                                      activation=tf.nn.relu,
                                                      layer_sizes=[50],
                                                      batch_size=512,
                                                      activate_decay=True,
                                                      initial_lr=0.1,
                                                      max_grad_norm=5.0,
                                                      show_training=False,
                                                      freq_summary=1000,
                                                      buffer_s=-1,
                                                      initial_pulls=2,
                                                      reset_lr=True,
                                                      lr_decay_rate=0.5,
                                                      training_freq=1,
                                                      training_freq_network=50,
                                                      training_epochs=100,
                                                      a0=6,
                                                      b0=6,
                                                      lambda_prior=0.25)
        policy = NeuralLinearPosteriorSampling('NeuralLinear', policy_parameters)

    elif opts.algorithm == "neurallinear2":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                       context_dim=bandit.context_dim,
                                                       init_scale=0.3,
                                                       activation=tf.nn.relu,
                                                       layer_sizes=[50],
                                                       batch_size=512,
                                                       activate_decay=True,
                                                       initial_lr=0.1,
                                                       max_grad_norm=5.0,
                                                       show_training=False,
                                                       freq_summary=1000,
                                                       buffer_s=-1,
                                                       initial_pulls=2,
                                                       reset_lr=True,
                                                       lr_decay_rate=0.5,
                                                       training_freq=10,
                                                       training_freq_network=50,
                                                       training_epochs=100,
                                                       a0=6,
                                                       b0=6,
                                                       lambda_prior=0.25)
        policy = NeuralLinearPosteriorSampling('NeuralLinear2', policy_parameters)

    elif opts.algorithm == "pnoise":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                     context_dim=bandit.context_dim,
                                                     init_scale=0.3,
                                                     activation=tf.nn.relu,
                                                     layer_sizes=[50],
                                                     batch_size=512,
                                                     activate_decay=True,
                                                     initial_lr=0.1,
                                                     max_grad_norm=5.0,
                                                     show_training=False,
                                                     freq_summary=1000,
                                                     buffer_s=-1,
                                                     initial_pulls=2,
                                                     optimizer='RMS',
                                                     reset_lr=True,
                                                     lr_decay_rate=0.5,
                                                     training_freq=50,
                                                     training_epochs=100,
                                                     noise_std=0.05,
                                                     eps=0.1,
                                                     d_samples=300,
                                                     )
        policy = ParameterNoiseSampling('ParamNoise', policy_parameters)

    elif opts.algorithm == "alpha_div":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                        context_dim=bandit.context_dim,
                                                        init_scale=0.3,
                                                        activation=tf.nn.relu,
                                                        layer_sizes=[50],
                                                        batch_size=512,
                                                        activate_decay=True,
                                                        initial_lr=0.1,
                                                        max_grad_norm=5.0,
                                                        show_training=False,
                                                        freq_summary=1000,
                                                        buffer_s=-1,
                                                        initial_pulls=2,
                                                        optimizer='RMS',
                                                        use_sigma_exp_transform=True,
                                                        cleared_times_trained=10,
                                                        initial_training_steps=100,
                                                        noise_sigma=0.1,
                                                        reset_lr=False,
                                                        training_freq=50,
                                                        training_epochs=100,
                                                        alpha=1.0,
                                                        k=20,
                                                        prior_variance=0.1)
        policy = PosteriorBNNSampling('BBAlphaDiv', policy_parameters, 'AlphaDiv')

    elif opts.algorithm == "gp":
        policy_parameters = tf.contrib.training.HParams(num_actions=bandit.num_actions,
                                                        num_outputs=bandit.num_actions,
                                                        context_dim=bandit.context_dim,
                                                        reset_lr=False,
                                                        learn_embeddings=True,
                                                        max_num_points=1000,
                                                        show_training=False,
                                                        freq_summary=1000,
                                                        batch_size=512,
                                                        keep_fixed_after_max_obs=True,
                                                        training_freq=50,
                                                        initial_pulls=2,
                                                        training_epochs=100,
                                                        lr=0.01,
                                                        buffer_s=-1,
                                                        initial_lr=0.001,
                                                        lr_decay_rate=0.0,
                                                        optimizer='RMS',
                                                        task_latent_dim=5,
                                                        activate_decay=False)
        policy = PosteriorBNNSampling('MultitaskGP', policy_parameters, 'GP')

    else:
        raise Exception("Misspecified bandit algorithm.")

    print(policy)
    # Run the contextual bandit process
    while True:
        context = bandit.context()
        if context is None:
            break
        action = policy.action(context)
        reward = bandit.pull(action)
        if reward is None:
            break

        policy.update(context, action, reward)

    conn.close()
    listener.close()
示例#10
0
    reset_lr=True,
    lr_decay_rate=0.5,
    training_freq=50,
    training_epochs=100,
    noise_std=0.05,
    eps=0.1,
    d_samples=300,
    bootstrap=artificial_data_generator)

hparams_lineps = tf.contrib.training.HParams(num_actions=num_actions,
                                             context_dim=context_dim,
                                             lam=0.1,
                                             eps=0.05)

random_proto = lambda: UniformSampling('Uniform Sampling', hparams)
neural_greedy_proto = lambda: PosteriorBNNSampling('NeuralGreedy', hparams_rms,
                                                   'RMSProp')
neural_greedy_proto_bootstrapped = lambda: PosteriorBNNSampling(
    'NeuralGreedy_artificial_data', hparams_rms_bootstrapped, 'RMSProp')

bootstrap_proto = lambda: BootstrappedBNNSampling('BootRMS', hparams_rmsb)
bootstrap_proto_bootstrapped = lambda: BootstrappedBNNSampling(
    'BootRMS_artificial_data', hparams_rmsb_bootstrapped)

noise_proto = lambda: ParameterNoiseSampling('ParamNoise', hparams_pnoise)
noise_proto_bootstrapped = lambda: ParameterNoiseSampling(
    'ParamNoise_artificial_data', hparams_pnoise_bootstrapped)

dropout_proto = lambda: PosteriorBNNSampling('Dropout', hparams_dropout,
                                             'RMSProp')
dropout_proto_bootstrapped = lambda: PosteriorBNNSampling(
    'Dropout_artificial_data', hparams_dropout_bootstrapped, 'RMSProp')