def grid_search_parameter(
    parameter_name,
    parameter_values,
    agent_name,
    agent_pytorch_mlr_args,
    env_args,
    extra_env_args,
    train_samples=5000,
    valid_samples=10000,
    valid_runs=5,
):
    ''' Perform a grid search by doing an A/B test with model variants that use different parameter values, log the rsult and return the optimal parameter value '''
    print(f'\tGrid search for {agent_name}')
    grid_search_agent_inits = dict()
    for parameter_value in parameter_values:
        agent_name_with_specified_parameter = f'{agent_name} ({parameter_name} = {parameter_value})'
        grid_search_agent_inits.update(
            build_agent_init(
                agent_name_with_specified_parameter, PyTorchMLRAgent, {
                    **agent_pytorch_mlr_args, parameter_name: parameter_value
                }))

    ### Run A/B test with model variants
    agent_stats = gather_agent_stats(env, env_args, {**extra_env_args},
                                     grid_search_agent_inits, [train_samples],
                                     valid_samples, valid_runs,
                                     StatEpochsNewRandomSeed)

    # Messy way to get everything into pandas, but it works
    gg = agent_stats[list(agent_stats.keys())[1]]
    l = []
    for k in list(gg.keys()):
        f = pd.DataFrame(gg[k])
        f['Alg'] = k
        f['Samples'] = agent_stats[list(agent_stats.keys())[0]]
        l.append(f)
    agent_stats_df = pd.concat(l)
    agent_stats_df.columns = [str(c) for c in agent_stats_df.columns]
    values = agent_stats_df['AgentStats.Q0_500'].values
    optimal_parameter_value = parameter_values[np.argmax(values)]
    print(agent_stats_df)
    agent_stats_df.to_csv(
        f'GS_{agent_name.replace(" ", "_")}_pop_eps{logging_epsilon}.csv',
        index=False)
    print(f'\t... optimal {parameter_name} = {optimal_parameter_value}!')
    return optimal_parameter_value
        'll_IPS': False,
    }
    optimal_ll_strength_withlog = grid_search_parameter(
        'alpha',
        alphas_dual_bandit_log,
        'Dual Bandit log',
        dual_bandit_log_mlr_args,
        std_env_args,
        std_extra_env_args,
    )

    # Initialisation of different agents
    logging_agent = build_agent_init(
        'Logging', OrganicUserEventCounterAgent, {
            **organic_user_count_args,
            **std_env_args, 'select_randomly': True,
            'reverse_pop': False,
            'epsilon': 0
        })

    skyline_agent = build_agent_init('Skyline', SkylineAgent, {
        **skyline_args,
    })

    likelihood_agent = build_agent_init('Likelihood', PyTorchMLRAgent, {
        **pytorch_mlr_args, 'll_IPS': False,
        'alpha': 1.0
    })

    ips_likelihood_agent = build_agent_init(
        'IPS Likelihood', PyTorchMLRAgent, {
        **pytorch_mlr_args,
        'logIPS': True,
        'll_IPS': False,
    }
    optimal_ll_strength_withlog = grid_search_parameter(
        'alpha',
        alphas_dual_bandit_log,
        'Dual Bandit log',
        dual_bandit_log_mlr_args,
        std_env_args,
        std_extra_env_args,
    )

    # Initialisation of different agents
    logging_agent = build_agent_init('Logging', RandomAgent, {
        **random_args,
        **std_env_args,
    })

    skyline_agent = build_agent_init('Skyline', SkylineAgent, {
        **skyline_args,
    })

    likelihood_agent = build_agent_init('Likelihood', PyTorchMLRAgent, {
        **pytorch_mlr_args, 'll_IPS': False,
        'alpha': 1.0
    })

    ips_likelihood_agent = build_agent_init(
        'IPS Likelihood', PyTorchMLRAgent, {
            **pytorch_mlr_args, 'll_IPS': True,
            'alpha': 1.0