示例#1
0
def pp_launch(r, kwargs, distributions, params, initial_counts, testing_params, measure_list, max_time,
              thresholds_roc, store_mob, store_measure_bernoullis):

    mob = MobilitySimulator(**kwargs)
    mob.simulate(max_time=max_time)

    sim = DiseaseModel(mob, distributions)

    sim.launch_epidemic(
        params=params,
        initial_counts=initial_counts,
        testing_params=testing_params,
        measure_list=measure_list,
        thresholds_roc=thresholds_roc,
        verbose=False)

    result = {
        'state' : sim.state,
        'state_started_at': sim.state_started_at,
        'state_ended_at': sim.state_ended_at,
        'measure_list' : copy.deepcopy(sim.measure_list),
        'people_age' : sim.mob.people_age,
        'children_count_iasy': sim.children_count_iasy,
        'children_count_ipre': sim.children_count_ipre,
        'children_count_isym': sim.children_count_isym,
        'tracing_stats' : sim.tracing_stats,
    }
    if store_mob:
        result['mob'] = sim.mob

    ml = result['measure_list']
    if not store_measure_bernoullis:
        ml.exit_run()

    return result
示例#2
0
def pp_launch(r, kwargs, distributions, params, initial_counts, testing_params,
              measure_list, max_time):

    mob = MobilitySimulator(**kwargs)
    mob.simulate(max_time=max_time)

    sim = DiseaseModel(mob, distributions)

    sim.launch_epidemic(params=params,
                        initial_counts=initial_counts,
                        testing_params=testing_params,
                        measure_list=measure_list,
                        verbose=False)

    result = {
        'state': sim.state,
        'state_started_at': sim.state_started_at,
        'state_ended_at': sim.state_ended_at,
        'measure_list': copy.deepcopy(sim.measure_list),
        'people_age': sim.mob.people_age,
        'children_count_iasy': sim.children_count_iasy,
        'children_count_ipre': sim.children_count_ipre,
        'children_count_isym': sim.children_count_isym,
    }
    if STORE_MOB:
        result['mob'] = sim.mob

    return result
def get_calibrated_params_limited_iters(country, area, multi_beta_calibration,
                                        maxiters):
    """
    Returns calibrated parameters using only the first `maxiters` iterations of BO.
    """

    state = load_state(calibration_states[country][area])
    train_G = state['train_G']
    train_G = train_G[:min(maxiters, len(train_G))]
    train_theta = state['train_theta']

    mob_settings = calibration_mob_paths[country][area][0]
    with open(mob_settings, 'rb') as fp:
        mob_kwargs = pickle.load(fp)
    mob = MobilitySimulator(**mob_kwargs)

    data_start_date = calibration_start_dates[country][area]
    data_end_date = calibration_lockdown_dates[country]['end']

    unscaled_area_cases = collect_data_from_df(
        country=country,
        area=area,
        datatype='new',
        start_date_string=data_start_date,
        end_date_string=data_end_date)
    assert (len(unscaled_area_cases.shape) == 2)

    # Scale down cases based on number of people in town and region
    sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs)
    n_days, n_age = sim_cases.shape

    G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age)
    G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1)

    def objective(G):
        return -(G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days

    train_G_objectives = objective(train_G)
    best_observed_idx = train_G_objectives.argmax()
    best_observed_obj = train_G_objectives[best_observed_idx].item()

    param_bounds = (calibration_model_param_bounds_multi
                    if multi_beta_calibration else
                    calibration_model_param_bounds_single)
    sim_bounds = pdict_to_parr(pdict=param_bounds,
                               multi_beta_calibration=multi_beta_calibration).T

    normalized_calibrated_params = train_theta[best_observed_idx]
    calibrated_params = transforms.unnormalize(normalized_calibrated_params,
                                               sim_bounds)
    calibrated_params = parr_to_pdict(
        parr=calibrated_params, multi_beta_calibration=multi_beta_calibration)
    return calibrated_params
示例#4
0
def make_bayes_opt_functions(args): 
    '''
    Generates and returns functions used to run Bayesian optimization
    Argument:
        args:                   Keyword arguments specifying exact settings for optimization

    Returns:
        objective :                         objective maximized for BO
        generate_initial_observations :     function to generate initial observations
        initialize_model :                  function to initialize GP
        optimize_acqf_and_get_observation : function to optimize acquisition function based on model
        case_diff :                         computes case difference between prediction array and ground truth at t=T
        unnormalize_theta :                 converts BO params to simulation params (unit cube to real parameters)
        header :                            header lines to be printed to log file

    '''
    header = []

    # depending on mode, set parameter bounds 
    if args.measures_optimized:
        param_bounds = settings_measures_param_bounds
    else:
        param_bounds = settings_model_param_bounds

    # remember line executed
    header.append('=' * 100)
    header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
    header.append('python ' + ' '.join(sys.argv))
    header.append('=' * 100)

    mob_settings = args.mob
    data_area = args.area
    data_country = args.country

    # initialize mobility object to obtain information (no trace generation yet)
    with open(mob_settings, 'rb') as fp:
        kwargs = pickle.load(fp)
    mob = MobilitySimulator(**kwargs)
    
    # data settings
    verbose = not args.not_verbose
    use_households = not args.no_households
    data_start_date = args.start
    data_end_date = args.end
    debug_simulation_days = args.endsimat

    # simulation settings
    n_init_samples = args.ninit
    n_iterations = args.niters
    simulation_roll_outs = args.rollouts
    cpu_count = args.cpu_count
    dynamic_tracing = not args.no_dynamic_tracing
    load_observations = args.load

    # set testing parameters
    testing_params = settings_testing_params

    # BO acquisition function optimization (Knowledge gradient)
    acqf_opt_num_fantasies = args.acqf_opt_num_fantasies
    acqf_opt_num_restarts = args.acqf_opt_num_restarts
    acqf_opt_raw_samples = args.acqf_opt_raw_samples
    acqf_opt_batch_limit = args.acqf_opt_batch_limit
    acqf_opt_maxiter = args.acqf_opt_maxiter

    """
    Bayesian optimization pipeline
    """


    # Import Covid19 data
    # Shape (max_days, num_age_groups)
    new_cases_ = collect_data_from_df(country=data_country, area=data_area, datatype='new',
                                      start_date_string=data_start_date, end_date_string=data_end_date)
    assert(len(new_cases_.shape) == 2)

    if new_cases_[0].sum() == 0:
        print('No positive cases at provided start time; cannot seed simulation.\n'
              'Consider setting a later start date for calibration using the "--start" flag.')
        exit(0)

    # Scale down cases based on number of people in town, region, and downsampling
    new_cases = np.ceil(
        (new_cases_ * mob.num_people_unscaled) /
        (mob.downsample * mob.region_population))
    num_age_groups = new_cases.shape[1]
    header.append('Downsampling : ' + str(mob.downsample))
    header.append('Town population: ' + str(mob.num_people))
    header.append('Town population (unscaled): ' + str(mob.num_people_unscaled))
    header.append('Region population : ' + str(mob.region_population))

    # Set test capacity per day as (a) command line; or (b) maximum daily positive case increase over observed period
    if args.testingcap:
        testing_params['tests_per_batch'] = (args.testingcap / mob.num_people_unscaled)
    else:
        daily_increase = new_cases.sum(axis=1)[1:] - new_cases.sum(axis=1)[:-1]
        testing_params['tests_per_batch'] = int(daily_increase.max())

    test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS)
    assert(int(testing_params['test_reporting_lag']) % 24 == 0)

    # generate initial seeds based on case numbers
    initial_seeds = gen_initial_seeds(new_cases)
    header.append('Initial seed counts : ' + str(initial_seeds))

    # in debug mode, shorten time of simulation, shorten time
    if debug_simulation_days:
        new_cases = new_cases[:debug_simulation_days]

    # Maximum time fixed by real data, init mobility simulator simulation
    # maximum time to simulate, in hours
    max_time = int(new_cases.shape[0] * TO_HOURS)
    max_time += TO_HOURS * test_lag_days  # longer due to test lag in simulations
    testing_params['testing_t_window'] = [0.0, max_time]
    mob.simulate(max_time=max_time, dynamic_tracing=True)

    header.append(
        'Daily test capacity in sim.: ' + str(testing_params['tests_per_batch']))
    header.append(
        'Max time T (days): ' + str(new_cases.shape[0]))
    header.append(
        'Target cases per age group at t=0:   ' + str(list(map(int, new_cases[0].tolist()))))
    header.append(
        'Target cases per age group at t=T:   ' + str(list(map(int, new_cases[-1].tolist()))))

    # instantiate correct distributions
    distributions = CovidDistributions(country=args.country)

    # set Bayesian optimization target as positive cases
    n_days, n_age = new_cases.shape
    G_obs = torch.tensor(new_cases).reshape(n_days * n_age)  # flattened

    sim_bounds = pdict_to_parr(param_bounds, measures_optimized=args.measures_optimized).T

    n_params = sim_bounds.shape[1]

    header.append(f'Parameters : {n_params}')
    header.append('Parameter bounds: ' + str(parr_to_pdict(sim_bounds.T, measures_optimized=args.measures_optimized)))

    # extract lockdown period
    sim_start_date = pd.to_datetime(args.start)
    sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS))

    lockdown_start_date = pd.to_datetime(
        settings_lockdown_dates[args.country]['start'])
    lockdown_end_date = pd.to_datetime(
        settings_lockdown_dates[args.country]['end'])

    days_until_lockdown_start = (lockdown_start_date - sim_start_date).days
    days_until_lockdown_end = (lockdown_end_date - sim_start_date).days

    header.append(f'Simulation starts at : {sim_start_date}')
    header.append(f'             ends at : {sim_end_date}')
    header.append(f'Lockdown   starts at : {lockdown_start_date}')
    header.append(f'             ends at : {lockdown_end_date}')
    
    # create settings dictionary for simulations
    launch_kwargs = dict(
        mob_settings=mob_settings,
        distributions=distributions,
        random_repeats=simulation_roll_outs,
        cpu_count=cpu_count,
        initial_seeds=initial_seeds,
        testing_params=testing_params,
        max_time=max_time,
        num_people=mob.num_people,
        num_sites=mob.num_sites,
        home_loc=mob.home_loc,
        site_loc=mob.site_loc,
        dynamic_tracing=dynamic_tracing,
        verbose=False)


    '''
    Define central functions for optimization
    '''

    G_obs = torch.tensor(new_cases).reshape(1, n_days * n_age)
    
    def composite_squared_loss(G):
        '''
        Objective function
        Note: in BO, objectives are maximized
        '''
        return - (G - G_obs).pow(2).sum(dim=-1)

    # select objective
    objective = GenericMCObjective(composite_squared_loss)

    def case_diff(preds):
        '''
        Computes case difference of predictions and ground truth at t=T
        '''
        return  preds.reshape(n_days, n_age)[-1].sum() - torch.tensor(new_cases)[-1].sum()

    def unnormalize_theta(theta):
        '''
        Computes unnormalized parameters
        '''
        return transforms.unnormalize(theta, sim_bounds)

    def composite_simulation(norm_params):
        """
        Takes a set of normalized (unit cube) BO parameters
        and returns simulator output means and standard errors based on multiple
        random restarts. This corresponds to the black-box function.
        """

        # un-normalize normalized params to obtain simulation parameters
        params = transforms.unnormalize(norm_params, sim_bounds)

        # finalize settings based which parameters are calibrated
        kwargs = copy.deepcopy(launch_kwargs)
        if args.measures_optimized:

            '''
            Measures are calibrated
            '''

            measure_params = parr_to_pdict(params, measures_optimized=args.measures_optimized)

            # social distancing measures: calibration is only done for `SocialDistancingForAllMeasure` for now
            measure_list_ = [
                SocialDistancingForPositiveMeasure(
                    t_window=Interval(0.0, max_time), p_stay_home=1.0),
                SocialDistancingForPositiveMeasureHousehold(
                    t_window=Interval(0.0, max_time), p_isolate=1.0),
                SocialDistancingForAllMeasure(
                    t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                      TO_HOURS * days_until_lockdown_end),
                    p_stay_home=measure_params['p_stay_home']),
            ]
            
            # close sites if specified
            if args.measures_close:
                beta_multipliers = {'education': 1.0, 'social': 1.0,
                                'bus_stop': 1.0, 'office': 1.0, 'supermarket': 1.0}
                for category in args.measures_close:
                    if category in beta_multipliers.keys():
                        beta_multipliers[category] = 0.0
                    else:
                        raise ValueError(f'Site type `{category}` passed in `--measures_close` is invalid.\n'
                                         f'Available are {str(list(beta_multipliers.keys()))}')
                
                measure_list_.append(BetaMultiplierMeasureByType(
                    t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                      TO_HOURS * days_until_lockdown_end),
                    beta_multiplier=beta_multipliers
                ))
            
            kwargs['measure_list'] = MeasureList(measure_list_)

            # get optimized model paramters for this country and area
            calibrated_model_params = settings_optimized_town_params[args.country][args.area]
            if calibrated_model_params is None:
                raise ValueError(f'Cannot optimize measures for {args.country}-{args.area} because model parameters ' 
                                  'have not been fitted yet. Set values in `calibration_settings.py`')
            kwargs['params'] = calibrated_model_params

        else:

            '''
            Model parameters calibrated
            '''
            
            kwargs['measure_list'] = MeasureList([
                SocialDistancingForPositiveMeasure(
                    t_window=Interval(0.0, max_time), p_stay_home=1.0),
                SocialDistancingForPositiveMeasureHousehold(
                    t_window=Interval(0.0, max_time), p_isolate=1.0),
            ])

            kwargs['params'] = parr_to_pdict(params, measures_optimized=args.measures_optimized)


        # run simulation in parallel,
        summary = launch_parallel_simulations(**kwargs)

        # (random_repeats, n_people)
        posi_started = torch.tensor(summary.state_started_at['posi'])
        posi_started -= test_lag_days * TO_HOURS # account for test lag

        # (random_repeats, n_days)
        age_groups = torch.tensor(summary.people_age)
        posi_cumulative = convert_timings_to_cumulative_daily(
            timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS)

        if posi_cumulative.shape[0] <= 1:
            raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.')

        # compute mean and standard error of means        
        G = torch.mean(posi_cumulative, dim=0)
        G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0])

        # make sure noise is not zero for non-degerateness
        G_sem = torch.max(G_sem, MIN_NOISE)

        # flatten
        G = G.reshape(1, n_days * n_age)
        G_sem = G_sem.reshape(1, n_days * n_age)

        return G, G_sem


    def generate_initial_observations(n, logger):
        """
        Takes an integer `n` and generates `n` initial observations
        from the black box function using Sobol random parameter settings
        in the unit cube. Returns parameter setting and black box function outputs
        """

        if n <= 0:
            raise ValueError(
                'qKnowledgeGradient and GP needs at least one observation to be defined properly.')

        # sobol sequence
        # new_thetas: [n, n_params]
        new_thetas = torch.tensor(
            sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float)

        # simulator observations
        # new_G, new_G_sem: [n, n_days * n_age] (flattened outputs)
        new_G = torch.zeros((n, n_days * n_age), dtype=torch.float)
        new_G_sem = torch.zeros((n, n_days * n_age), dtype=torch.float)

        for i in range(n):

            t0 = time.time()

            # get mean and standard error of mean (sem) of every simulation output
            G, G_sem = composite_simulation(new_thetas[i, :])
            new_G[i, :] = G
            new_G_sem[i, :] = G_sem

            # log
            G_objectives = objective(new_G[:i+1])
            best_idx = G_objectives.argmax()
            best = G_objectives[best_idx].item()
            current = objective(G).item()
            case_diff = (
                G.reshape(n_days, n_age)[-1].sum()
                - G_obs.reshape(n_days, n_age)[-1].sum())

            t1 = time.time()
            logger.log(
                i=i - n,
                time=t1 - t0,
                best=best,
                objective=current,
                case_diff=case_diff,
                theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds)
            )

            # save state
            state = {
                'train_theta': new_thetas[:i+1],
                'train_G': new_G[:i+1],
                'train_G_sem': new_G_sem[:i+1],
                'best_observed_obj': best,
                'best_observed_idx': best_idx,
            }
            save_state(state, logger.filename + '_init')

        # compute best objective from simulations
        f = objective(new_G)
        best_f_idx = f.argmax()
        best_f = f[best_f_idx].item()

        return new_thetas, new_G, new_G_sem, best_f, best_f_idx

    def initialize_model(train_x, train_y, train_y_sem):
        """
        Defines a GP given X, Y, and noise observations (standard error of mean)
        """
        
        train_ynoise = train_y_sem.pow(2.0) # noise is in variance units
        
        # standardize outputs to zero mean, unit variance to have good hyperparameter tuning
        model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=Standardize(m=n_days * n_age))

        # "Loss" for GPs - the marginal log likelihood
        mll = ExactMarginalLogLikelihood(model.likelihood, model)

        return mll, model

    # Model initialization
    # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs
    bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)])

    def optimize_acqf_and_get_observation(acq_func, args):
        """
        Optimizes the acquisition function, and returns a new candidate and a noisy observation.
        botorch defaults:  num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200
        """

        batch_initial_conditions = gen_one_shot_kg_initial_conditions(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
        )

        # optimize acquisition function
        candidates, _ = optimize_acqf(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,  # used for intialization heuristic
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
            batch_initial_conditions=batch_initial_conditions
        )

        # proposed evaluation
        new_theta = candidates.detach()

        # observe new noisy function evaluation
        new_G, new_G_sem = composite_simulation(new_theta.squeeze())

        return new_theta, new_G, new_G_sem

    # return functions
    return (
        objective, 
        generate_initial_observations,
        initialize_model,
        optimize_acqf_and_get_observation,
        case_diff,
        unnormalize_theta,
        header,
    )
示例#5
0
def make_bayes_opt_functions(args): 
    '''
    Generates and returns functions used to run Bayesian optimization
    Argument:
        args:                   Keyword arguments specifying exact settings for optimization

    Returns:
        objective :                         objective maximized for BO
        generate_initial_observations :     function to generate initial observations
        initialize_model :                  function to initialize GP
        optimize_acqf_and_get_observation : function to optimize acquisition function based on model
        case_diff :                         computes case difference between prediction array and ground truth at t=T
        unnormalize_theta :                 converts BO params to simulation params (unit cube to real parameters)
        header :                            header lines to be printed to log file

    '''
    header = []

    # set parameter bounds based on calibration mode (single beta vs multiple beta)
    multi_beta_calibration = args.multi_beta_calibration
    if multi_beta_calibration:
        param_bounds = calibration_model_param_bounds_multi
    else:
        param_bounds = calibration_model_param_bounds_single
        
    # remember line executed
    header.append('=' * 100)
    header.append(datetime.now().strftime("%d/%m/%Y %H:%M:%S"))
    header.append('python ' + ' '.join(sys.argv))
    header.append('=' * 100)

    data_country = args.country
    data_area = args.area
    mob_settings = args.mob or calibration_mob_paths[data_country][data_area][0] # 0: downscaled, 1: full scale 

    # initialize mobility object to obtain information (no trace generation yet)
    with open(mob_settings, 'rb') as fp:
        mob_kwargs = pickle.load(fp)
    mob = MobilitySimulator(**mob_kwargs)
    
    # data settings
    verbose = not args.not_verbose
    use_households = not args.no_households
    data_start_date = args.start or calibration_start_dates[data_country][data_area]
    data_end_date = args.end or calibration_lockdown_dates[args.country]['end']
    per_age_group_objective = args.per_age_group_objective

    # simulation settings
    n_init_samples = args.ninit
    n_iterations = args.niters
    simulation_roll_outs = args.rollouts
    cpu_count = args.cpu_count
    lazy_contacts = not args.no_lazy_contacts
    load_observations = args.load

    # set testing parameters
    testing_params = calibration_testing_params

    # BO acquisition function optimization (Knowledge gradient)
    acqf_opt_num_fantasies = args.acqf_opt_num_fantasies
    acqf_opt_num_restarts = args.acqf_opt_num_restarts
    acqf_opt_raw_samples = args.acqf_opt_raw_samples
    acqf_opt_batch_limit = args.acqf_opt_batch_limit
    acqf_opt_maxiter = args.acqf_opt_maxiter

    """
    Bayesian optimization pipeline
    """

    # Import Covid19 data
    # Shape (max_days, num_age_groups)
    unscaled_area_cases = collect_data_from_df(country=data_country, area=data_area, datatype='new',
                                               start_date_string=data_start_date, end_date_string=data_end_date)
    assert(len(unscaled_area_cases.shape) == 2)

    # Scale down cases based on number of people in town and region
    sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs)

    # Generate initial seeds based on unscaled case numbers in town
    initial_seeds = gen_initial_seeds(
        sim_cases, day=0)

    if sum(initial_seeds.values()) == 0:
        print('No states seeded at start time; cannot start simulation.\n'
              'Consider setting a later start date for calibration using the "--start" flag.')
        exit(0)

    num_age_groups = sim_cases.shape[1]
    header.append('Downsampling :                    {}'.format(mob.downsample))
    header.append('Simulation population:            {}'.format(mob.num_people))
    header.append('Simulation population (unscaled): {}'.format(mob.num_people_unscaled))
    header.append('Area population :                 {}'.format(mob.region_population))
    header.append('Initial seed counts :             {}'.format(initial_seeds))

    scaled_test_capacity = get_test_capacity(
        country=data_country, area=data_area, 
        mob_settings=mob_kwargs, end_date_string=data_end_date)

    testing_params['tests_per_batch'] = scaled_test_capacity

    test_lag_days = int(testing_params['test_reporting_lag'] / TO_HOURS)
    assert(int(testing_params['test_reporting_lag']) % 24 == 0)

    # Maximum time fixed by real data, init mobility simulator simulation
    # maximum time to simulate, in hours
    max_time = int(sim_cases.shape[0] * TO_HOURS)
    max_time += TO_HOURS * test_lag_days  # simulate longer due to test lag in simulations
    testing_params['testing_t_window'] = [0.0, max_time]
    mob.simulate(max_time=max_time, lazy_contacts=True)

    header.append(
        'Target cases per age group at t=0:   {} {}'.format(sim_cases[0].sum().item(), list(sim_cases[0].tolist())))
    header.append(
        'Target cases per age group at t=T:   {} {}'.format(sim_cases[-1].sum().item(), list(sim_cases[-1].tolist())))
    header.append(
        'Daily test capacity in sim.:         {}'.format(testing_params['tests_per_batch']))

    # instantiate correct distributions
    distributions = CovidDistributions(country=args.country)

    # set Bayesian optimization target as positive cases
    n_days, n_age = sim_cases.shape
    
    sim_bounds = pdict_to_parr(
        pdict=param_bounds, 
        multi_beta_calibration=multi_beta_calibration
    ).T

    n_params = sim_bounds.shape[1]

    header.append(f'Parameters : {n_params}')
    header.append('Parameter bounds: {}'.format(parr_to_pdict(parr=sim_bounds.T, multi_beta_calibration=multi_beta_calibration)))

    # extract lockdown period
    sim_start_date = pd.to_datetime(data_start_date)
    sim_end_date = sim_start_date + timedelta(days=int(max_time / TO_HOURS))

    lockdown_start_date = pd.to_datetime(
        calibration_lockdown_dates[args.country]['start'])
    lockdown_end_date = pd.to_datetime(
        calibration_lockdown_dates[args.country]['end'])

    days_until_lockdown_start = (lockdown_start_date - sim_start_date).days
    days_until_lockdown_end = (lockdown_end_date - sim_start_date).days

    header.append(f'Simulation starts at : {sim_start_date}')
    header.append(f'             ends at : {sim_end_date}')
    header.append(f'Lockdown   starts at : {lockdown_start_date}')
    header.append(f'             ends at : {lockdown_end_date}')
    header.append(f'Cases compared until : {pd.to_datetime(data_end_date)}')
    header.append(f'            for days : {sim_cases.shape[0]}')
    
    # create settings dictionary for simulations
    launch_kwargs = dict(
        mob_settings=mob_settings,
        distributions=distributions,
        random_repeats=simulation_roll_outs,
        cpu_count=cpu_count,
        initial_seeds=initial_seeds,
        testing_params=testing_params,
        max_time=max_time,
        num_people=mob.num_people,
        num_sites=mob.num_sites,
        home_loc=mob.home_loc,
        site_loc=mob.site_loc,
        lazy_contacts=lazy_contacts,
        verbose=False)


    '''
    Define central functions for optimization
    '''

    G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age)
    G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1)

    '''
    Objective function
    Note: in BO and botorch, objectives are maximized
    '''
    if per_age_group_objective:
        def composite_squared_loss(G):
            return - (G - G_obs).pow(2).sum(dim=-1) / n_days

    else:
        def composite_squared_loss(G):
            return - (G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days


    # select objective function
    objective = GenericMCObjective(composite_squared_loss)

    def case_diff(preds):
        '''
        Computes aggregate case difference of predictions and ground truth at t=T
        '''
        if per_age_group_objective:
            return preds[-1].sum(dim=-1) - G_obs_aggregate[-1]
        else:
            return preds[-1] - G_obs_aggregate[-1]

    def unnormalize_theta(theta):
        '''
        Computes unnormalized parameters
        '''
        return transforms.unnormalize(theta, sim_bounds)

    def composite_simulation(norm_params):
        """
        Takes a set of normalized (unit cube) BO parameters
        and returns simulator output means and standard errors based on multiple
        random restarts. This corresponds to the black-box function.
        """

        # un-normalize normalized params to obtain simulation parameters
        params = transforms.unnormalize(norm_params, sim_bounds)

        # finalize model parameters based on given parameters and calibration mode
        kwargs = copy.deepcopy(launch_kwargs)        
        all_params = parr_to_pdict(parr=params, multi_beta_calibration=multi_beta_calibration)

        if multi_beta_calibration:
            betas = all_params['betas']
        else:
            betas = {
                'education': all_params['beta_site'],
                'social': all_params['beta_site'],
                'bus_stop': all_params['beta_site'],
                'office': all_params['beta_site'],
                'supermarket': all_params['beta_site'],
            }

        model_params = {
            'betas' : betas,
            'beta_household' : all_params['beta_household'],
        }

        # set exposure parameters
        kwargs['params'] = model_params

        # set measure parameters
        kwargs['measure_list'] = MeasureList([
            # standard behavior of positively tested: full isolation
            SocialDistancingForPositiveMeasure(
                t_window=Interval(0.0, max_time), p_stay_home=1.0),
            SocialDistancingForPositiveMeasureHousehold(
                t_window=Interval(0.0, max_time), p_isolate=1.0),

            # social distancing factor during lockdown: calibrated
            SocialDistancingForAllMeasure(
                t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                  TO_HOURS * days_until_lockdown_end),
                p_stay_home=all_params['p_stay_home']),

            # site specific measures: fixed in advance, outside of calibration
            BetaMultiplierMeasureByType(
                t_window=Interval(TO_HOURS * days_until_lockdown_start,
                                  TO_HOURS * days_until_lockdown_end),
                beta_multiplier=calibration_lockdown_beta_multipliers)
        ])

        # run simulation in parallel,
        summary = launch_parallel_simulations(**kwargs)

        # (random_repeats, n_people)
        posi_started = torch.tensor(summary.state_started_at['posi'])
        posi_started -= test_lag_days * TO_HOURS # account for test lag in objective computation

        # (random_repeats, n_days)
        age_groups = torch.tensor(summary.people_age)

        # (random_repeats, n_days, n_age_groups)
        posi_cumulative = convert_timings_to_cumulative_daily(
            timings=posi_started, age_groups=age_groups, time_horizon=n_days * TO_HOURS)

        if posi_cumulative.shape[0] <= 1:
            raise ValueError('Must run at least 2 random restarts per setting to get estimate of noise in observation.')
        
        # compute aggregate if not using objective per age-group
        if not per_age_group_objective:
            posi_cumulative = posi_cumulative.sum(dim=-1)

        # compute mean and standard error of means        
        G = torch.mean(posi_cumulative, dim=0)
        G_sem = torch.std(posi_cumulative, dim=0) / math.sqrt(posi_cumulative.shape[0])

        # make sure noise is not zero for non-degenerateness
        G_sem = torch.max(G_sem, MIN_NOISE)

        # flatten
        if per_age_group_objective:
            G = G.reshape(n_days * n_age)
            G_sem = G_sem.reshape(n_days * n_age)

        return G, G_sem

    def generate_initial_observations(n, logger, loaded_init_theta=None, loaded_init_G=None, loaded_init_G_sem=None):
        """
        Takes an integer `n` and generates `n` initial observations
        from the black box function using Sobol random parameter settings
        in the unit cube. Returns parameter setting and black box function outputs.
        If `loaded_init_theta/G/G_sem` are specified, initialization is loaded (possibly partially, in which
        case the initialization using the Sobol random sequence is continued where left off).
        """

        if n <= 0:
            raise ValueError(
                'qKnowledgeGradient and GP needs at least one observation to be defined properly.')

        # sobol sequence proposal points
        # new_thetas: [n, n_params]
        new_thetas = torch.tensor(
            sobol_seq.i4_sobol_generate(n_params, n), dtype=torch.float)

        # check whether initial observations are loaded
        loaded = (loaded_init_theta is not None
              and loaded_init_G is not None 
              and loaded_init_G_sem is not None)
        if loaded:
            n_loaded = loaded_init_theta.shape[0] # loaded no. of observations total
            n_loaded_init = min(n_loaded, n)      # loaded no. of quasi-random initialization observations
            n_init = max(n_loaded, n)             # final no. of observations returned, at least quasi-random initializations

            # check whether loaded proposal points are same as without loading observations
            try:
                assert(np.allclose(loaded_init_theta[:n_loaded_init], new_thetas[:n_loaded_init]))
            except AssertionError:
                print(
                    '\n\n\n===> Warning: parameters of loaded inital observations '
                    'do not coincide with initialization that would have been done. '
                    'Double check simulation, ninit, and parameter bounds, which could change '
                    'the initial random Sobol sequence. \nThe loaded parameter settings are used. \n\n\n'
                )
            
            if n_init > n:
                new_thetas = loaded_init_theta # size of tensor increased to `n_init`, as more than Sobol init points loaded

        else:
            n_loaded = 0       # loaded no. of observations total
            n_loaded_init = 0  # loaded no. of quasi-random initialization observations
            n_init = n         # final no. of observations returned, at least quasi-random initializations

        # instantiate simulator observation tensors
        if per_age_group_objective:
            # new_G, new_G_sem: [n_init, n_days * n_age] (flattened outputs)
            new_G = torch.zeros((n_init, n_days * n_age), dtype=torch.float)
            new_G_sem = torch.zeros((n_init, n_days * n_age), dtype=torch.float)
        else:
            # new_G, new_G_sem: [n_init, n_days]
            new_G = torch.zeros((n_init, n_days), dtype=torch.float)
            new_G_sem = torch.zeros((n_init, n_days), dtype=torch.float)

        # generate `n` initial evaluations at quasi random settings; if applicable, skip and load expensive evaluation result
        for i in range(n_init):
            
            # if loaded, use initial observation for this parameter settings
            if loaded and i <= n_loaded - 1:
                new_thetas[i] = loaded_init_theta[i]
                G, G_sem = loaded_init_G[i], loaded_init_G_sem[i]
                walltime = 0.0

            # if not loaded, evaluate as usual
            else:
                t0 = time.time()
                G, G_sem = composite_simulation(new_thetas[i])
                walltime = time.time() - t0

            new_G[i] = G
            new_G_sem[i] = G_sem

            # log
            G_objectives = objective(new_G[:i+1])
            best_idx = G_objectives.argmax()
            best = G_objectives[best_idx].item()
            current = objective(G).item()

            if per_age_group_objective:
                case_diff = G.reshape(n_days, n_age)[-1].sum() - G_obs_aggregate[-1]
            else:
                case_diff = G[-1] - G_obs_aggregate[-1]
            
            logger.log(
                i=i - n,
                time=walltime,
                best=best,
                objective=current,
                case_diff=case_diff,
                theta=transforms.unnormalize(new_thetas[i, :].detach().squeeze(), sim_bounds)
            )

            # save state
            state = {
                'train_theta': new_thetas[:i+1],
                'train_G': new_G[:i+1],
                'train_G_sem': new_G_sem[:i+1],
                'best_observed_obj': best,
                'best_observed_idx': best_idx,
            }
            save_state(state, logger.filename)

        # compute best objective from simulations
        f = objective(new_G)
        best_f_idx = f.argmax()
        best_f = f[best_f_idx].item()

        return new_thetas, new_G, new_G_sem, best_f, best_f_idx

    def initialize_model(train_x, train_y, train_y_sem):
        """
        Defines a GP given X, Y, and noise observations (standard error of mean)
        """
        
        train_ynoise = train_y_sem.pow(2.0) # noise is in variance units
        
        # standardize outputs to zero mean, unit variance to have good hyperparameter tuning
        outcome_transform = Standardize(m=n_days * n_age if per_age_group_objective else n_days)
        model = FixedNoiseGP(train_x, train_y, train_ynoise, outcome_transform=outcome_transform)

        # "Loss" for GPs - the marginal log likelihood
        mll = ExactMarginalLogLikelihood(model.likelihood, model)

        return mll, model

    # Model initialization
    # parameters used in BO are always in unit cube for optimal hyperparameter tuning of GPs
    bo_bounds = torch.stack([torch.zeros(n_params), torch.ones(n_params)])

    def optimize_acqf_and_get_observation(acq_func, args):
        """
        Optimizes the acquisition function, and returns a new candidate and a noisy observation.
        botorch defaults:  num_restarts=10, raw_samples=256, batch_limit=5, maxiter=200
        """

        batch_initial_conditions = gen_one_shot_kg_initial_conditions(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
        )

        # optimize acquisition function
        candidates, _ = optimize_acqf(
            acq_function=acq_func,
            bounds=bo_bounds,
            q=1,
            num_restarts=args.acqf_opt_num_restarts,
            raw_samples=args.acqf_opt_raw_samples,  # used for intialization heuristic
            options={"batch_limit": args.acqf_opt_batch_limit,
                     "maxiter": args.acqf_opt_maxiter},
            batch_initial_conditions=batch_initial_conditions
        )

        # proposed evaluation
        new_theta = candidates.detach().squeeze()

        # observe new noisy function evaluation
        new_G, new_G_sem = composite_simulation(new_theta)

        return new_theta, new_G, new_G_sem

    # return functions
    return (
        objective, 
        generate_initial_observations,
        initialize_model,
        optimize_acqf_and_get_observation,
        case_diff,
        unnormalize_theta,
        header,
    )
示例#6
0
def get_unique_calibration_params(*, country, area, multi_beta_calibration, maxiters=None):
    """
    Returns all unique parameter settings that ** improved ** the objective
    during calibration for a `country` and an `area`
    """

    param_bounds = (
        calibration_model_param_bounds_multi
        if multi_beta_calibration else
        calibration_model_param_bounds_single)
    sim_bounds = pdict_to_parr(
        pdict=param_bounds,
        multi_beta_calibration=multi_beta_calibration
    ).T

    state = load_state(calibration_states[country][area])
    train_theta = state['train_theta']
    train_G = state['train_G']
    
    mob_settings = calibration_mob_paths[country][area][0]
    with open(mob_settings, 'rb') as fp:
        mob_kwargs = pickle.load(fp)
    mob = MobilitySimulator(**mob_kwargs)

    data_start_date = calibration_start_dates[country][area]
    data_end_date = calibration_lockdown_dates[country]['end']

    unscaled_area_cases = collect_data_from_df(country=country, area=area, datatype='new',
                                            start_date_string=data_start_date, end_date_string=data_end_date)
    assert (len(unscaled_area_cases.shape) == 2)

    # Scale down cases based on number of people in town and region
    sim_cases = downsample_cases(unscaled_area_cases, mob_kwargs)
    n_days, n_age = sim_cases.shape

    G_obs = torch.tensor(sim_cases).reshape(1, n_days * n_age)
    G_obs_aggregate = torch.tensor(sim_cases).sum(dim=-1)

    def objective(G):
        return - (G - G_obs_aggregate).pow(2).sum(dim=-1) / n_days

    # if maxiters provided, select submatrix of state
    if maxiters:
        train_theta = train_theta[:min(maxiters, train_theta.shape[0])]
        train_G = train_G[:min(maxiters, train_G.shape[0])]

    # extract all parameter settings that improved
    best = - 99999999999999
    t = 0
    all_params = []

    while t < train_theta.shape[0]:
        theta = train_theta[t]
        G = train_G[t]
        obj = objective(G).item()

        if obj > best:
            best = obj
            calibrated_params = transforms.unnormalize(theta, sim_bounds)
            all_params.append(
                (t, parr_to_pdict(parr=calibrated_params, multi_beta_calibration=multi_beta_calibration)))

        t += 1

    return all_params
示例#7
0
def compute_mob_statistics(loc_tup, days, max_people, verbose=False):
    '''Computes all MobilitySimulator statistics for given `country` and `area` '''

    country, area = loc_tup

    if verbose:
        print(country, area)

    # get mobility simulator settings
    statistics = dict()
    mob_settings_downsampled, mob_settings_full = calibration_mob_paths[
        country][area]

    # downsampled
    with open(mob_settings_downsampled, 'rb') as fp:
        obj = pickle.load(fp)
    mob_downsampled = MobilitySimulator(**obj)
    mob_downsampled.verbose = verbose
    mob_downsampled.simulate(max_time=days * TO_HOURS, lazy_contacts=True)

    # full
    with open(mob_settings_full, 'rb') as fp:
        obj = pickle.load(fp)
    mob_full = MobilitySimulator(**obj)
    mob_full.verbose = verbose
    mob_full.simulate(max_time=days * TO_HOURS, lazy_contacts=True)

    # compute contact information
    contact_info_downsampled = get_stats(mob_downsampled,
                                         max_people,
                                         verbose=verbose)
    del mob_downsampled
    contact_info_full = get_stats(mob_full, max_people, verbose=verbose)
    del mob_full

    # summarize
    for s in contact_info_downsampled.keys():

        fig = plt.figure(figsize=(4, 7))
        ax0 = fig.add_subplot(211)
        ax0.hist(contact_info_downsampled[s])
        ax0.set_title('downsampled')
        xlim0 = ax0.get_xlim()
        ax1 = fig.add_subplot(212)
        ax1.hist(contact_info_full[s])
        ax1.set_title('full')
        xlim1 = ax1.get_xlim()

        ax0.set_xlim((min(xlim0[0], xlim1[0]), max(xlim0[1], xlim1[1])))
        ax1.set_xlim((min(xlim0[0], xlim1[0]), max(xlim0[1], xlim1[1])))
        fig.suptitle(s)
        plt.savefig('plots/betaScaling-' + loc_tup[0] + '-' + loc_tup[1] +
                    '-' + s + '.png',
                    format='png',
                    facecolor=None,
                    dpi=200,
                    bbox_inches='tight')
        plt.close('all')

        d = comp_stats(contact_info_downsampled[s], contact_info_full[s])
        for k, v in d.items():
            statistics['ratio-' + k + '-' + s] = v

    # print always
    print(country, area)
    pprint(statistics)

    return statistics