def test_generate_population_initial_age(age_bounds_mock, initial_age_mock): creation_time = pd.Timestamp(1990, 7, 2) step_size = pd.Timedelta(days=1) age_params = {'age_start': 0, 'age_end': 0} pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) r = { k: get_randomness() for k in ['general_purpose', 'bin_selection', 'age_smoothing'] } sims = make_base_simulants() simulant_ids = sims.index bp.generate_population(simulant_ids, creation_time, step_size, age_params, pop_data, r, lambda *args, **kwargs: None) initial_age_mock.assert_called_once() mock_args = initial_age_mock.call_args[0] assert mock_args[0].equals(sims) assert mock_args[1].equals(pop_data) assert mock_args[2] == float(age_params['age_start']) assert mock_args[3] == step_size assert mock_args[4] == r age_bounds_mock.assert_not_called()
def test__assign_demography_with_initial_age_zero(config): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] simulants = make_base_simulants() initial_age = 0 r = { k: get_randomness() for k in ['general_purpose', 'bin_selection', 'age_smoothing'] } step_size = utilities.to_time_delta(config.time.step_size) simulants = bp._assign_demography_with_initial_age( simulants, pop_data, initial_age, step_size, r, lambda *args, **kwargs: None) assert len(simulants) == len(simulants.age.unique()) assert simulants.age.min() > initial_age assert simulants.age.max() < initial_age + utilities.to_years(step_size) assert math.isclose(len(simulants[simulants.sex == 'Male']) / len(simulants), 0.5, abs_tol=0.01) for location in simulants.location.unique(): assert math.isclose(len(simulants[simulants.location == location]) / len(simulants), 1 / len(simulants.location.unique()), abs_tol=0.01)
def test_BasePopulation(config, base_plugins, generate_population_mock): num_days = 600 time_step = 100 # Days sims = make_full_simulants() start_population_size = len(sims) generate_population_mock.return_value = sims.drop(columns=['tracked']) base_pop = bp.BasePopulation() components = [base_pop] config.update( { 'population': { 'population_size': start_population_size }, 'time': { 'step_size': time_step } }, layer='override') simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins) time_start = simulation._clock.time pop_structure = simulation._data.load('population.structure') pop_structure['location'] = simulation.configuration.input_data.location uniform_pop = dt.assign_demographic_proportions(pop_structure) assert base_pop.population_data.equals(uniform_pop) age_params = { 'age_start': config.population.age_start, 'age_end': config.population.age_end } sub_pop = bp.BasePopulation.select_sub_population_data( uniform_pop, time_start.year) generate_population_mock.assert_called_once() # Get a dictionary of the arguments used in the call mock_args = generate_population_mock.call_args[1] assert mock_args[ 'creation_time'] == time_start - simulation._clock.step_size assert mock_args['age_params'] == age_params assert mock_args['population_data'].equals(sub_pop) assert mock_args['randomness_streams'] == base_pop.randomness pop = simulation.get_population() for column in pop: assert pop[column].equals(sims[column]) final_ages = pop.age + num_days / utilities.DAYS_PER_YEAR simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert np.allclose(pop.age, final_ages, atol=0.5 / utilities.DAYS_PER_YEAR) # Within a half of a day.
def test__get_bins_and_proportions_with_youngest_bin(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[(pop_data.year_start == 1990) & (pop_data.location == 1) & (pop_data.sex == 'Male')] age = dt.AgeValues(current=2.5, young=0, old=7.5) endpoints, proportions = dt._get_bins_and_proportions(pop_data, age) assert endpoints.left == 0 assert endpoints.right == 5 bin_width = endpoints.right - endpoints.left assert proportions.current == 1 / len(pop_data) / bin_width assert proportions.young == 1 / len(pop_data) / bin_width assert proportions.old == 1 / len(pop_data) / bin_width
def test_rescale_binned_proportions_full_range(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] pop_data_scaled = dt.rescale_binned_proportions(pop_data, age_start=0, age_end=100) pop_data_scaled = pop_data_scaled[pop_data_scaled.age.isin( pop_data.age.unique())] assert np.allclose(pop_data['P(sex, location, age| year)'], pop_data_scaled['P(sex, location, age| year)'])
def test_assign_demographic_proportions(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) assert np.allclose(pop_data['P(sex, location, age| year)'], len(pop_data.year_start.unique()) / len(pop_data)) assert np.allclose(pop_data['P(sex, location | age, year)'], (len(pop_data.year_start.unique()) * len(pop_data.age.unique()) / len(pop_data))) assert np.allclose( pop_data['P(age | year, sex, location)'], (len(pop_data.year_start.unique()) * len(pop_data.sex.unique()) * len(pop_data.location.unique()) / len(pop_data)))
def test__assign_demography_with_age_bounds_error(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) simulants = make_base_simulants() age_start, age_end = 110, 120 r = { k: get_randomness() for k in ['general_purpose', 'bin_selection', 'age_smoothing'] } with pytest.raises(ValueError): bp._assign_demography_with_age_bounds(simulants, pop_data, age_start, age_end, r, lambda *args, **kwargs: None)
def test_rescale_binned_proportions_age_bin_edges(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] # Test edge case where age_start/age_end fall on age bin boundaries. pop_data_scaled = dt.rescale_binned_proportions(pop_data, age_start=5, age_end=10) assert len(pop_data_scaled.age.unique()) == len(pop_data.age.unique()) + 2 assert 7.5 in pop_data_scaled.age.unique() correct_data = ([1 / len(pop_data)] * (len(pop_data_scaled) // 2 - 2) + [0, 0]) * 2 assert np.allclose(pop_data_scaled['P(sex, location, age| year)'], correct_data)
def test__assign_demography_with_initial_age_error(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] simulants = make_base_simulants() initial_age = 200 r = { k: get_randomness() for k in ['general_purpose', 'bin_selection', 'age_smoothing'] } step_size = pd.Timedelta(days=1) with pytest.raises(ValueError): bp._assign_demography_with_initial_age(simulants, pop_data, initial_age, step_size, r, lambda *args, **kwargs: None)
def test_rescale_binned_proportions_clipped_ends(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] scale = len(pop_data.location.unique()) * len(pop_data.sex.unique()) pop_data_scaled = dt.rescale_binned_proportions(pop_data, age_start=2, age_end=7) base_p = 1 / len(pop_data) p_scaled = [ base_p * 7 / 5, base_p * 3 / 5, base_p * 2 / 5, base_p * 8 / 5 ] + [base_p] * (len(pop_data_scaled) // scale - 5) + [0] for group, sub_population in pop_data_scaled.groupby(['sex', 'location']): assert np.allclose(sub_population['P(sex, location, age| year)'], p_scaled)
def test_smooth_ages(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] simulants = pd.DataFrame({ 'age': [22.5] * 10000 + [52.5] * 10000, 'sex': ['Male', 'Female'] * 10000, 'location': [1, 2] * 10000 }) randomness = get_randomness() smoothed_simulants = dt.smooth_ages(simulants, pop_data, randomness) assert math.isclose(len(smoothed_simulants.age.unique()), len(smoothed_simulants.index), abs_tol=1) # Tolerance is 3*std_dev of the sample mean assert math.isclose(smoothed_simulants.age.mean(), 37.5, abs_tol=3 * math.sqrt(13.149778198**2 / 2000))
def test__assign_demography_with_age_bounds(): pop_data = dt.assign_demographic_proportions( make_uniform_pop_data(age_bin_midpoint=True)) pop_data = pop_data[pop_data.year_start == 1990] simulants = make_base_simulants() age_start, age_end = 0, 180 r = { k: get_randomness(k) for k in [ 'general_purpose', 'bin_selection', 'age_smoothing', 'age_smoothing_age_bounds' ] } simulants = bp._assign_demography_with_age_bounds( simulants, pop_data, age_start, age_end, r, lambda *args, **kwargs: None) assert math.isclose(len(simulants[simulants.sex == 'Male']) / len(simulants), 0.5, abs_tol=0.01) for location in simulants.location.unique(): assert math.isclose(len(simulants[simulants.location == location]) / len(simulants), 1 / len(simulants.location.unique()), abs_tol=0.01) ages = np.sort(simulants.age.values) age_deltas = ages[1:] - ages[:-1] age_bin_width = 5 # See `make_uniform_pop_data` num_bins = len(pop_data.age.unique()) n = len(simulants) assert math.isclose(age_deltas.mean(), age_bin_width * num_bins / n, rel_tol=1e-3) assert age_deltas.max( ) < 100 * age_bin_width * num_bins / n # Make sure there are no big age gaps.
def _build_population_data_table(data): """Constructs a population data table for use as a population distribution over demographic characteristics. Parameters ---------- data : pd.DataFrame Population structure data Returns ------- pandas.DataFrame Table with columns 'age' : Midpoint of the age group, 'age_start' : Lower bound of the age group, 'age_end' : Upper bound of the age group, 'sex' : 'Male' or 'Female', 'location' : location, 'year' : Year, 'population' : Total population estimate, 'P(sex, location | age, year)' : Conditional probability of sex and location given age and year, 'P(sex, location, age | year)' : Conditional probability of sex, location, and age given year, 'P(age | year, sex, location)' : Conditional probability of age given year, sex, and location. """ return assign_demographic_proportions(data)