def test_age_out_simulants(config, base_plugins): start_population_size = 10000 num_days = 600 time_step = 100 # Days config.update( { 'population': { 'population_size': start_population_size, 'age_start': 4, 'age_end': 4, 'exit_age': 5, }, 'time': { 'step_size': time_step } }, layer='override') components = [bp.BasePopulation()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins) time_start = simulation._clock.time assert len(simulation.get_population()) == len( simulation.get_population().age.unique()) simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert len(pop) == len(pop[~pop.tracked]) exit_after_300_days = pop.exit_time >= time_start + pd.Timedelta(300, unit='D') exit_before_400_days = pop.exit_time <= time_start + pd.Timedelta(400, unit='D') assert len(pop) == len(pop[exit_after_300_days & exit_before_400_days])
def test_Mortality(config, base_plugins): num_days = 365 components = [TestPopulation(), Mortality()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df = pd.read_csv(config.path_to_mortality_file) # to save time, only look at locatiosn existing on the test dataset. mortality_rate_df = df[df['LAD.code'] == 'E08000032'] asfr_data = transform_rate_table(mortality_rate_df, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("cause.all_causes.cause_specific_mortality_rate", asfr_data) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('alive', len(pop[pop['alive'] == 'alive'])) print('dead', len(pop[pop['alive'] != 'alive'])) assert (np.all(pop.alive == 'alive') == False)
def test_BasePopulation(config, base_plugins, generate_population_mock): num_days = 600 time_step = 100 # Days sims = make_full_simulants() start_population_size = len(sims) generate_population_mock.return_value = sims.drop(columns=['tracked']) base_pop = bp.BasePopulation() components = [base_pop] config.update( { 'population': { 'population_size': start_population_size }, 'time': { 'step_size': time_step } }, layer='override') simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins) time_start = simulation._clock.time pop_structure = simulation._data.load('population.structure') pop_structure['location'] = simulation.configuration.input_data.location uniform_pop = dt.assign_demographic_proportions(pop_structure) assert base_pop.population_data.equals(uniform_pop) age_params = { 'age_start': config.population.age_start, 'age_end': config.population.age_end } sub_pop = bp.BasePopulation.select_sub_population_data( uniform_pop, time_start.year) generate_population_mock.assert_called_once() # Get a dictionary of the arguments used in the call mock_args = generate_population_mock.call_args[1] assert mock_args[ 'creation_time'] == time_start - simulation._clock.step_size assert mock_args['age_params'] == age_params assert mock_args['population_data'].equals(sub_pop) assert mock_args['randomness_streams'] == base_pop.randomness pop = simulation.get_population() for column in pop: assert pop[column].equals(sims[column]) final_ages = pop.age + num_days / utilities.DAYS_PER_YEAR simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert np.allclose(pop.age, final_ages, atol=0.5 / utilities.DAYS_PER_YEAR) # Within a half of a day.
def test_FertilityCrudeBirthRate(config, base_plugins): pop_size = config.population.population_size num_days = 100 components = [TestPopulation(), FertilityCrudeBirthRate()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) simulation._data.write("covariate.live_births_by_sex.estimate", crude_birth_rate_data()) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert np.all(pop.alive == 'alive') assert len(pop.age) > pop_size
def test_fertility_module(config, base_plugins): start_population_size = config.population.population_size num_days = 365 * 3 components = [TestPopulation(), FertilityAgeSpecificRates()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df = pd.read_csv(config.path_to_fertility_file) # to save time, only look at locatiosn existing on the test dataset. fertility_rate_df = df[(df['LAD.code'] == 'E08000032')] asfr_data = transform_rate_table(fertility_rate_df, 2011, 2012, 10, 50, [2]) # Mock Fertility Data simulation._data.write("covariate.age_specific_fertility_rate.estimate", asfr_data) simulation.setup() time_start = simulation._clock.time assert 'last_birth_time' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' assert 'parent_id' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print(pop) # No death in this model. assert np.all(pop.alive == 'alive'), 'expect all simulants to be alive' # TODO: Write a more rigorous test. assert len(pop.age) > start_population_size, 'expect new simulants' for i in range(start_population_size, len(pop)): assert pop.loc[pop.iloc[i].parent_id].last_birth_time >= time_start, 'expect all children to have mothers who' \ ' gave birth after the simulation starts.'
def test_emigration(config, base_plugins): start_population_size = config.population.population_size num_days = 365 * 10 components = [TestPopulation(), Emigration()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) # setup emigration rates df_emigration = pd.read_csv(config.path_to_emigration_file) df_total_population = pd.read_csv(config.path_to_total_population_file) df_emigration = df_emigration[(df_emigration['LAD.code'] == 'E08000032') | (df_emigration['LAD.code'] == 'E08000032')] df_total_population = df_total_population[ (df_total_population['LAD'] == 'E08000032') | (df_total_population['LAD'] == 'E08000032')] asfr_data_emigration = compute_migration_rates(df_emigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end, aggregate_over=75) # Mock emigration Data simulation._data.write("covariate.age_specific_migration_rate.estimate", asfr_data_emigration) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('emigrated', len(pop[pop['alive'] == 'emigrated'])) print('remaining population', len(pop[pop['emigrated'] == 'no_emigrated'])) assert (np.all(pop.alive == 'alive') == False) assert len(pop[pop['emigrated'] == 'Yes']) > 0, 'expect migration'
def test_Immigration(config, base_plugins): num_days = 10 components = [TestPopulation(), Immigration()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df_total_population = pd.read_csv(config.path_to_total_population_file) df_total_population = df_total_population[ (df_total_population['LAD'] == 'E08000032')] # setup immigration rates df_immigration = pd.read_csv(config.path_to_immigration_file) df_immigration = df_immigration[ (df_immigration['LAD.code'] == 'E08000032')] asfr_data_immigration = compute_migration_rates(df_immigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end, normalize=False ) # setup immigration rates df_immigration_MSOA = pd.read_csv(config.path_to_immigration_MSOA) # read total immigrants from the file total_immigrants = int(df_immigration[df_immigration.columns[4:]].sum().sum()) simulation._data.write("cause.all_causes.cause_specific_immigration_rate", asfr_data_immigration) simulation._data.write("cause.all_causes.cause_specific_total_immigrants_per_year", total_immigrants) simulation._data.write("cause.all_causes.immigration_to_MSOA", df_immigration_MSOA) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert (len(pop["entrance_time"].value_counts()) > 1) print (pop)
def test_FertilityDeterministic(config): pop_size = config.population.population_size annual_new_simulants = 1000 step_size = config.time.step_size num_days = 100 config.update({ 'fertility': { 'number_of_new_simulants_each_year': annual_new_simulants } }, **metadata(__file__)) components = [TestPopulation(), FertilityDeterministic()] simulation = InteractiveContext(components=components, configuration=config) num_steps = simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() assert num_steps == num_days // step_size assert np.all(pop.alive == 'alive') assert int(num_days * annual_new_simulants / utilities.DAYS_PER_YEAR) == len(pop.age) - pop_size
def test_internal_outmigration(config, base_plugins): num_days = 365 * 5 components = [TestPopulation(), InternalMigration()] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) df = pd.read_csv(config.path_to_internal_outmigration_file) # to save time, only look at locations existing on the test dataset. df_internal_outmigration = df[df['LAD.code'].isin([ 'E08000032', 'E08000033', 'E08000034', 'E06000024', 'E08000035', 'E07000163' ])] asfr_data = transform_rate_table(df_internal_outmigration, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("cause.age_specific_internal_outmigration_rate", asfr_data) # Read MSOA ---> LAD msoa_lad_df = pd.read_csv(config.path_msoa_to_lad) # Read OD matrix, only destinations OD_matrix_dest = pd.read_csv(config.path_to_OD_matrix_index_file, index_col=0) OD_matrix_with_LAD = OD_matrix_dest.merge( msoa_lad_df[["MSOA11CD", "LAD16CD"]], left_index=True, right_on="MSOA11CD") OD_matrix_with_LAD.index = OD_matrix_with_LAD["indices"] # Create indices for MSOA and LAD MSOA_location_index = OD_matrix_with_LAD["MSOA11CD"].to_dict() LAD_location_index = OD_matrix_with_LAD["LAD16CD"].to_dict() # Now, read the whole matrix (if it passes the first check) simulation._data.write("internal_migration.MSOA_index", MSOA_location_index) simulation._data.write("internal_migration.LAD_index", LAD_location_index) simulation._data.write("internal_migration.MSOA_LAD_indices", OD_matrix_with_LAD) simulation._data.write("internal_migration.path_to_OD_matrices", config.path_to_OD_matrices) simulation.setup() simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('internal outmigration', len(pop[pop['internal_outmigration'] == 'Yes'])) print('remaining population', len(pop[pop['internal_outmigration'] == 'No'])) assert (np.all(pop.internal_outmigration == 'Yes') == False) assert len(pop[pop['last_outmigration_time'] != 'NaT'] ) > 0, 'time of out migration gets saved.' assert len(pop[pop['previous_MSOA_locations'] != ''] ) > 0, 'previous location of the migrant gets saved.'
def test_pipeline(config, base_plugins): start_population_size = config.population.population_size num_days = 365 * 2 components = [ TestPopulation(), FertilityAgeSpecificRates(), Mortality(), Emigration(), Immigration() ] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=base_plugins, setup=False) # setup mortality rates df = pd.read_csv(config.path_to_mortality_file) mortality_rate_df = df[(df['LAD.code'] == 'E08000032')] asfr_data = transform_rate_table(mortality_rate_df, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("cause.all_causes.cause_specific_mortality_rate", asfr_data) # setup fertility rates df_fertility = pd.read_csv(config.path_to_fertility_file) fertility_rate_df = df_fertility[(df_fertility['LAD.code'] == 'E08000032')] asfr_data_fertility = transform_rate_table(fertility_rate_df, 2011, 2012, 10, 50, [2]) simulation._data.write("covariate.age_specific_fertility_rate.estimate", asfr_data_fertility) # setup emigration rates df_emigration = pd.read_csv(config.path_to_emigration_file) df_total_population = pd.read_csv(config.path_to_total_population_file) df_emigration = df_emigration[(df_emigration['LAD.code'] == 'E08000032')] df_total_population = df_total_population[( df_total_population['LAD'] == 'E08000032')] asfr_data_emigration = compute_migration_rates(df_emigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end) simulation._data.write("covariate.age_specific_migration_rate.estimate", asfr_data_emigration) # setup immigration rates df_immigration = pd.read_csv(config.path_to_immigration_file) df_immigration = df_immigration[( df_immigration['LAD.code'] == 'E08000032')] asfr_data_immigration = compute_migration_rates( df_immigration, df_total_population, 2011, 2012, config.population.age_start, config.population.age_end, normalize=False) # read total immigrants from the file total_immigrants = int( df_immigration[df_immigration.columns[4:]].sum().sum()) simulation._data.write("cause.all_causes.cause_specific_immigration_rate", asfr_data_immigration) simulation._data.write( "cause.all_causes.cause_specific_total_immigrants_per_year", total_immigrants) df_immigration_MSOA = pd.read_csv(config.path_to_immigration_MSOA) simulation._data.write("cause.all_causes.immigration_to_MSOA", df_immigration_MSOA) simulation.setup() time_start = simulation._clock.time assert 'last_birth_time' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' assert 'parent_id' in simulation.get_population().columns, \ 'expect Fertility module to update state table.' simulation.run_for(duration=pd.Timedelta(days=num_days)) pop = simulation.get_population() print('alive', len(pop[pop['alive'] == 'alive'])) print('dead', len(pop[pop['alive'] == 'dead'])) print('emigrated', len(pop[pop['alive'] == 'emigrated'])) assert (np.all(pop.alive == 'alive') == False) assert len(pop[pop['emigrated'] == 'Yes']) > 0, 'expect migration' assert len(pop.age) > start_population_size, 'expect new simulants' for i in range(start_population_size, len(pop)): # skip immigrated population if pop.loc[i].immigrated == 'Yes': continue assert pop.loc[pop.loc[i].parent_id].last_birth_time >= time_start, 'expect all children to have mothers who' \ ' gave birth after the simulation starts.'
def RunPipeline(config, start_population_size): """ Run the daedalus Microsimulation pipeline Parameters ---------- config : ConfigTree Config file to run the pipeline start_population_size: int Size of the starting population Returns: -------- A dataframe with the resulting simulation """ # Set up the components using the config. config.update({'population': { 'population_size': start_population_size, }}, source=str(Path(__file__).resolve())) num_years = config.time.num_years components = [eval(x) for x in config.components] simulation = InteractiveContext(components=components, configuration=config, plugin_configuration=utils.base_plugins(), setup=False) if 'InternalMigration()' in config.components: # setup internal migration matrices OD_matrices = InternalMigrationMatrix(configuration=config) OD_matrices.set_matrix_tables() simulation._data.write("internal_migration.MSOA_index", OD_matrices.MSOA_location_index) simulation._data.write("internal_migration.LAD_index", OD_matrices.LAD_location_index) simulation._data.write("internal_migration.MSOA_LAD_indices", OD_matrices.df_OD_matrix_with_LAD) simulation._data.write("internal_migration.path_to_OD_matrices", config.path_to_OD_matrices) # setup internal migraionts rates asfr_int_migration = InternalMigrationRateTable(configuration=config) asfr_int_migration.set_rate_table() simulation._data.write("cause.age_specific_internal_outmigration_rate", asfr_int_migration.rate_table) if 'Mortality()' in config.components: # setup mortality rates asfr_mortality = MortalityRateTable(configuration=config) asfr_mortality.set_rate_table() simulation._data.write( "cause.all_causes.cause_specific_mortality_rate", asfr_mortality.rate_table) if 'FertilityAgeSpecificRates()' in config.components: # setup fertility rates asfr_fertility = FertilityRateTable(configuration=config) asfr_fertility.set_rate_table() simulation._data.write( "covariate.age_specific_fertility_rate.estimate", asfr_fertility.rate_table) if 'Emigration()' in config.components: # setup emigration rates asfr_emigration = EmigrationRateTable(configuration=config) asfr_emigration.set_rate_table() simulation._data.write( "covariate.age_specific_migration_rate.estimate", asfr_emigration.rate_table) if 'Immigration()' in config.components: # setup immigration rates asfr_immigration = ImmigrationRateTable(configuration=config) asfr_immigration.set_rate_table() asfr_immigration.set_total_immigrants() simulation._data.write("cause.all_causes.immigration_to_MSOA", pd.read_csv(config.path_to_immigration_MSOA)) simulation._data.write( "cause.all_causes.cause_specific_immigration_rate", asfr_immigration.rate_table) simulation._data.write( "cause.all_causes.cause_specific_total_immigrants_per_year", asfr_immigration.total_immigrants) print('Start simulation setup') print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) simulation.setup() print('Start running simulation') print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) for year in range(1, num_years + 1): simulation.run_for(duration=pd.Timedelta(days=365.25)) print('Finished running simulation for year:', year) print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")) pop = simulation.get_population() # assign age brackets to the individuals pop = utils.get_age_bucket(pop) # save the output file to csv year_output_dir = os.path.join( os.path.join(config.output_dir, config.location, 'year_' + str(year))) os.makedirs(year_output_dir, exist_ok=True) output_data_filename = 'ssm_' + config.location + '_MSOA11_ppp_2011_simulation_year_' + str( year) + '.csv' pop.to_csv(os.path.join(year_output_dir, output_data_filename)) print() print('In year: ', config.time.start.year + year) # print some summary stats on the simulation print('alive', len(pop[pop['alive'] == 'alive'])) if 'Mortality()' in config.components: print('dead', len(pop[pop['alive'] == 'dead'])) if 'Emigration()' in config.components: print('emigrated', len(pop[pop['alive'] == 'emigrated'])) if 'InternalMigration()' in config.components: print('internal migration', len(pop[pop['internal_outmigration'] != ''])) if 'FertilityAgeSpecificRates()' in config.components: print('New children', len(pop[pop['parent_id'] != -1])) if 'Immigration()' in config.components: print('Immigrants', len(pop[pop['immigrated'].astype(str) == 'Yes'])) return pop