def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']
        opt_rewards = sampled_vals['opt_rewards']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'snp' or algo_name[:3] == 'anp':
            hidden_size = 64
            latent_units = 32
            global_latent_net_sizes = [hidden_size] * 2 + [2 * latent_units]
            local_latent_net_sizes = [hidden_size] * 3 + [2]
            x_y_encoder_sizes = [hidden_size] * 3
            heteroskedastic_net_sizes = None
            mean_att_type = attention.laplace_attention
            scale_att_type_1 = attention.laplace_attention
            scale_att_type_2 = attention.laplace_attention
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            is_anp = False

            config = algo_name.split('_')
            mfile = FLAGS.prefix + config[1] + '_' + config[2] + FLAGS.suffix
            if algo_name[:3] == 'anp':
                mfile = 'anp_' + mfile
                local_latent_net_sizes = [hidden_size] * 3 + [2 * 5]
                is_anp = True
            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_sizes=x_encoder_sizes,
                x_y_encoder_sizes=x_y_encoder_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                mean_att_type=mean_att_type,
                scale_att_type_1=scale_att_type_1,
                scale_att_type_2=scale_att_type_2,
                data_uncertainty=data_uncertainty,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50,
                uncertainty_type='attentive_freeform',
                local_variational=True,
                model_path=mpath,
                is_anp=is_anp)

            if config[1] == 'prior':
                hparams.set_hparam('local_variational', False)

            if config[2] == 'gp':
                hparams.set_hparam('uncertainty_type', 'attentive_gp')

            algos.append(
                offline_contextual_bandits.OfflineContextualBandits(
                    algo_name, hparams))

    t_init = time.time()
    _, h_rewards = contextual_bandit.run_contextual_bandit(
        context_dim,
        num_actions,
        dataset,
        algos,
        num_contexts=FLAGS.num_contexts)  # pytype: disable=wrong-keyword-args
    t_final = time.time()

    return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]
示例#2
0
def run_trial(trial_idx, delta, algo_names):
    """Runs a trial of wheel bandit problem instance for a set of algorithms."""

    filename = os.path.join(FLAGS.datasetdir,
                            str(delta) + '_' + str(trial_idx) + '.npz')
    with gfile.GFile(filename, 'r') as f:
        sampled_vals = np.load(f)
        dataset = sampled_vals['dataset']
        opt_rewards = sampled_vals['opt_rewards']

    x_hidden_size = 100
    x_encoder_sizes = [x_hidden_size] * 2

    algos = []
    for algo_name in algo_names:
        if algo_name == 'uniform':
            hparams = contrib_training.HParams(num_actions=num_actions)
            algos.append(uniform_sampling.UniformSampling(algo_name, hparams))
        elif algo_name == 'neurolinear':
            hparams = contrib_training.HParams(num_actions=num_actions,
                                               context_dim=context_dim,
                                               init_scale=0.3,
                                               activation=tf.nn.relu,
                                               output_activation=tf.nn.relu,
                                               layer_sizes=x_encoder_sizes,
                                               batch_size=512,
                                               activate_decay=True,
                                               initial_lr=0.1,
                                               max_grad_norm=5.0,
                                               show_training=False,
                                               freq_summary=1000,
                                               buffer_s=-1,
                                               initial_pulls=2,
                                               reset_lr=True,
                                               lr_decay_rate=0.5,
                                               training_freq=1,
                                               training_freq_network=20,
                                               training_epochs=50,
                                               a0=12,
                                               b0=30,
                                               lambda_prior=23)
            algos.append(
                neural_linear_sampling.NeuralLinearPosteriorSampling(
                    algo_name, hparams))
        elif algo_name == 'multitaskgp':
            hparams_gp = contrib_training.HParams(
                num_actions=num_actions,
                num_outputs=num_actions,
                context_dim=context_dim,
                reset_lr=False,
                learn_embeddings=True,
                max_num_points=1000,
                show_training=False,
                freq_summary=1000,
                batch_size=512,
                keep_fixed_after_max_obs=True,
                training_freq=20,
                initial_pulls=2,
                training_epochs=50,
                lr=0.01,
                buffer_s=-1,
                initial_lr=0.001,
                lr_decay_rate=0.0,
                optimizer='RMS',
                task_latent_dim=5,
                activate_decay=False)
            algos.append(
                posterior_bnn_sampling.PosteriorBNNSampling(
                    algo_name, hparams_gp, 'GP'))
        elif algo_name[:3] == 'gnp':
            hidden_size = 64
            x_encoder_net_sizes = None
            decoder_net_sizes = [hidden_size] * 3 + [2 * num_actions]
            heteroskedastic_net_sizes = None
            att_type = 'multihead'
            att_heads = 8
            data_uncertainty = False
            config = algo_name.split('_')
            model_type = config[1]
            if algo_name[:len('gnp_anp_beta_')] == 'gnp_anp_beta_':
                mfile = algo_name + FLAGS.suffix
                x_y_encoder_net_sizes = [hidden_size] * 3
                global_latent_net_sizes = [hidden_size] * 2
                local_latent_net_sizes = None
                beta = float(config[3])
                temperature = float(config[5])
            else:
                mfile = FLAGS.prefix + config[1] + FLAGS.suffix
                if model_type == 'cnp':
                    x_y_encoder_net_sizes = [hidden_size] * 4
                    global_latent_net_sizes = None
                    local_latent_net_sizes = None
                elif model_type == 'np':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = None
                elif model_type == 'anp':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = None
                elif model_type == 'acnp':
                    x_y_encoder_net_sizes = [hidden_size] * 4
                    global_latent_net_sizes = None
                    local_latent_net_sizes = None
                elif model_type == 'acns':
                    x_y_encoder_net_sizes = [hidden_size] * 2
                    global_latent_net_sizes = [hidden_size] * 2
                    local_latent_net_sizes = [hidden_size] * 2

                beta = 1.
                temperature = 1.

            mpath = os.path.join(FLAGS.modeldir, mfile)

            hparams = contrib_training.HParams(
                num_actions=num_actions,
                context_dim=context_dim,
                init_scale=0.3,
                activation=tf.nn.relu,
                output_activation=tf.nn.relu,
                x_encoder_net_sizes=x_encoder_net_sizes,
                x_y_encoder_net_sizes=x_y_encoder_net_sizes,
                global_latent_net_sizes=global_latent_net_sizes,
                local_latent_net_sizes=local_latent_net_sizes,
                decoder_net_sizes=decoder_net_sizes,
                heteroskedastic_net_sizes=heteroskedastic_net_sizes,
                att_type=att_type,
                att_heads=att_heads,
                model_type=model_type,
                data_uncertainty=data_uncertainty,
                beta=beta,
                temperature=temperature,
                model_path=mpath,
                batch_size=512,
                activate_decay=True,
                initial_lr=0.1,
                max_grad_norm=5.0,
                show_training=False,
                freq_summary=1000,
                buffer_s=-1,
                initial_pulls=2,
                reset_lr=True,
                lr_decay_rate=0.5,
                training_freq=10,
                training_freq_network=20,
                training_epochs=50)

            algos.append(
                offline_contextual_bandits_gnp.OfflineContextualBandits(
                    algo_name, hparams))

    t_init = time.time()
    _, h_rewards = contextual_bandit.run_contextual_bandit(
        context_dim,
        num_actions,
        dataset,
        algos,
        num_contexts=FLAGS.num_contexts)
    t_final = time.time()

    return h_rewards, t_final - t_init, opt_rewards[:FLAGS.num_contexts]