def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']
        opt_rewards = sampled_vals['opt_rewards']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
            hidden_size = 64
            latent_units = 32
            global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units]
            local_latent_net_sizes = [hidden_size] * 3 + [2]
            x_y_encoder_sizes = [hidden_size] * 3
            heteroskedastic_net_sizes = None
            mean_att_type = attention.laplace_attention
            scale_att_type_1 = attention.laplace_attention
            scale_att_type_2 = attention.laplace_attention
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            is_anp = False

            config = algo_name.split('_')
            mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix
            if algo_name[:3] == 'anp':
                mfile = 'anp_' + mfile
                local_latent_net_sizes = [hidden_size] * 3 + [2 * 5]
                is_anp = True
            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_sizes=x_encoder_sizes,
                x_y_encoder_sizes=x_y_encoder_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                mean_att_type=mean_att_type,
                scale_att_type_1=scale_att_type_1,
                scale_att_type_2=scale_att_type_2,
                data_uncertainty=data_uncertainty,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50,
                uncertainty_type='attentive_freeform',
                local_variational=True,
                model_path=mpath,
                is_anp=is_anp)

            if config[1] == 'prior':
                hparams.set_hparam('local_variational', False)

            if config[2] == 'gp':
                hparams.set_hparam('uncertainty_type', 'attentive_gp')

            algos.append(
                offline_contextual_bandits.OfflineContextualBandits(
                    algo_name, hparams))

    t_init = time.time()
    _, h_rewards = contextual_bandit.run_contextual_bandit(
        context_dim,
        num_actions,
        dataset,
        algos,
        num_contexts=FLAGS.num_contexts)  # pytype: disable=wrong-keyword-args
    t_final = time.time()

    return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
Exemplo n.º 2
0
def run_trial(trial_idx, delta, algo_names):
  """Runs a trial of wheel bandit problem instance for a set of algorithms."""

  all_algo_names = '_'.join(algo_names)
  runfile = str(delta) + '_' + str(trial_idx) + '_' + all_algo_names + '.pkl'
  savefile = os.path.join(FLAGS.savedir, runfile)
  if gfile.Exists(savefile):
    print('File exists...terminating')
    with gfile.Open(savefile, 'rb') as infile:
      saved_state = pickle.load(infile, encoding='latin-1')
    return saved_state['h_rewards'], saved_state['time']

  filename = os.path.join(
      FLAGS.datasetdir,
      str(delta) + '_' + str(trial_idx) + '.npz')
  with gfile.GFile(filename, 'r') as f:
    sampled_vals = np.load(f)
    dataset = sampled_vals['dataset']

  x_hidden_size = 100
  x_encoder_sizes = [x_hidden_size]*2

  algos = []
  ckptfile = None
  save_once = False
  for algo_name in algo_names:
    if algo_name == 'uniform':
      hparams = contrib_training.HParams(num_actions=num_actions)
      algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
    elif algo_name == 'neurolinear':
      hparams = contrib_training.HParams(
          num_actions=num_actions,
          context_dim=context_dim,
          init_scale=0.3,
          activation=tf.nn.relu,
          output_activation=tf.nn.relu,
          layer_sizes=x_encoder_sizes,
          batch_size=512,
          activate_decay=True,
          initial_lr=0.1,
          max_grad_norm=5.0,
          show_training=False,
          freq_summary=1000,
          buffer_s=-1,
          initial_pulls=2,
          reset_lr=True,
          lr_decay_rate=0.5,
          training_freq=1,
          training_freq_network=20,
          training_epochs=50,
          a0=12,
          b0=30,
          lambda_prior=23)
      algos.append(neural_linear_sampling.NeuralLinearPosteriorSampling(
          algo_name, hparams))
    elif algo_name == 'multitaskgp':
      hparams_gp = contrib_training.HParams(
          num_actions=num_actions,
          num_outputs=num_actions,
          context_dim=context_dim,
          reset_lr=False,
          learn_embeddings=True,
          max_num_points=1000,
          show_training=False,
          freq_summary=1000,
          batch_size=512,
          keep_fixed_after_max_obs=True,
          training_freq=20,
          initial_pulls=2,
          training_epochs=50,
          lr=0.01,
          buffer_s=-1,
          initial_lr=0.001,
          lr_decay_rate=0.0,
          optimizer='RMS',
          task_latent_dim=5,
          activate_decay=False)
      algos.append(posterior_bnn_sampling.PosteriorBNNSampling(
          algo_name, hparams_gp, 'GP'))
    elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
      hidden_size = 64
      latent_units = 32
      global_latent_net_sizes = [hidden_size]*2 + [2*latent_units]
      if algo_name[:3] == 'snp':
        local_latent_net_sizes = [hidden_size]*3 + [2]
      else:
        local_latent_net_sizes = [hidden_size]*3 + [2*5]
      x_y_encoder_sizes = [hidden_size]*3
      heteroskedastic_net_sizes = None
      mean_att_type = attention.laplace_attention
      scale_att_type_1 = attention.laplace_attention
      scale_att_type_2 = attention.laplace_attention
      att_type = 'multihead'
      att_heads = 8
      data_uncertainty = False
      is_anp = True if algo_name[:3] == 'anp' else False

      hparams = contrib_training.HParams(
          num_actions=num_actions,
          context_dim=context_dim,
          init_scale=0.3,
          activation=tf.nn.relu,
          output_activation=tf.nn.relu,
          x_encoder_sizes=x_encoder_sizes,
          x_y_encoder_sizes=x_y_encoder_sizes,
          global_latent_net_sizes=global_latent_net_sizes,
          local_latent_net_sizes=local_latent_net_sizes,
          heteroskedastic_net_sizes=heteroskedastic_net_sizes,
          att_type=att_type,
          att_heads=att_heads,
          mean_att_type=mean_att_type,
          scale_att_type_1=scale_att_type_1,
          scale_att_type_2=scale_att_type_2,
          data_uncertainty=data_uncertainty,
          batch_size=512,
          activate_decay=True,
          initial_lr=0.1,
          max_grad_norm=5.0,
          show_training=False,
          freq_summary=1000,
          buffer_s=-1,
          initial_pulls=2,
          reset_lr=True,
          lr_decay_rate=0.5,
          training_freq=10,
          training_freq_network=20,
          training_epochs=50,
          uncertainty_type='attentive_freeform',
          local_variational=True,
          model_path=None,
          is_anp=is_anp)

      config = algo_name.split('_')
      if config[1] == 'prior':
        hparams.set_hparam('local_variational', False)

      if config[2] == 'gp':
        hparams.set_hparam('uncertainty_type', 'attentive_gp')

      if config[3] == 'warmstart' or config[3] == 'offline':
        mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix
        if algo_name[:3] == 'anp':
          mfile = 'anp_' + mfile
        mpath = os.path.join(FLAGS.modeldir, mfile)
        hparams.set_hparam('model_path', mpath)

      if config[3] == 'online' or config[3] == 'warmstart':
        algos.append(online_contextual_bandits.OnlineContextualBandits(
            algo_name, hparams))
      else:
        algos.append(offline_contextual_bandits.OfflineContextualBandits(
            algo_name, hparams))
        ckptfile = os.path.join(FLAGS.ckptdir, runfile)
        if gfile.Exists(ckptfile):
          save_once = True

  t_init = time.time()
  print('started')
  _, h_rewards = run_contextual_bandit(
      dataset,
      algos,
      save_once=save_once,
      pkl_file=ckptfile)
  t_final = time.time()

  savedict = {'h_rewards': h_rewards, 'time': t_final-t_init}
  with gfile.Open(savefile, 'wb') as outfile:
    pickle.dump(savedict, outfile)
  return h_rewards, t_final - t_init