def test_generate_household_sizes_from_fixed_pop_size(self): """ Test generate_household_sizes_from_fixed_pop_size the test data is specifically crafted to execute all conditional branches of the method. Returns: None """ even_dist = {1: 0.2, 2: 0.2, 3: 0.2, 4: 0.2, 5: 0.2} # 900 is divisble by the expected value (3.0) but 901 is not # this creates test cases for N_gen = N and N_gen < N condition for i in [900, 901]: hh = sp.generate_household_sizes_from_fixed_pop_size( N=i, hh_size_distr=even_dist) # verify the total number of people matches N self.assertEqual(i, sum([(n + 1) * hh[n] for n in range(0, len(hh))])) # verify distribution self.verify_buckets(even_dist.values(), hh) # slightly modify the distribution to create expected value = 2.91 which will round down to 2.9 # and create N_gen > N condition uneven_dist = {1: 0.2, 2: 0.2, 3: 0.2, 4: 0.29, 5: 0.11} hh2 = sp.generate_household_sizes_from_fixed_pop_size( N=900, hh_size_distr=uneven_dist) self.assertEqual(900, sum([(n + 1) * hh2[n] for n in range(0, len(hh2))])) self.verify_buckets(uneven_dist.values(), hh2)
def test_generate_all_households(location='seattle_metro', state_location='Washington', country_location='usa'): N = 1000 household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) hha_brackets = sp.get_head_age_brackets(datadir, country_location=country_location) hha_by_size_counts = sp.get_head_age_by_size_distr( datadir, country_location=country_location) age_brackets_filepath = sp.get_census_age_brackets_path( datadir, state_location, country_location) age_brackets = sp.get_age_brackets_from_df(age_brackets_filepath) age_by_brackets_dic = sp.get_age_by_brackets_dic(age_brackets) contact_matrix_dic = sp.get_contact_matrix_dic( datadir, sheet_name='United States of America') single_year_age_distr = {} for n in range(101): single_year_age_distr[n] = float(1.0 / 101.0) homes_dic, homes = sp.generate_all_households( N, hh_sizes, hha_by_size_counts, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic, single_year_age_distr) assert homes_dic, homes is not None
def test_generate_household_sizes_from_fixed_pop_size(location='seattle_metro', state_location='Washington', country_location='usa'): household_size_distr = sp.get_household_size_distr(datadir, location, state_location, country_location) Nhomes = 1000 hh_sizes = sp.generate_household_sizes_from_fixed_pop_size(Nhomes, household_size_distr) assert len(hh_sizes) == 7
def test_custom_household_size_distro_honored(self): """ This methods checks results from generate_household_sizes_from_fixed_pop_size with customized distribution. It checks that the most common household size should be the size with the highest probability and also uses verify_portion_honored method for validation logic. Returns: None """ self.is_debugging = False custom_distro = { 1: 0.25, 2: 0.075, 3: 0.10, 4: 0.30, 5: 0.05, 6: 0.05, 7: 0.175 } hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( 500, custom_distro) hh_size_list = list(hh_sizes) # Comes as np.ndarray fewest_houses = min(hh_size_list) fewest_index = hh_size_list.index(fewest_houses) most_houses = max(hh_size_list) most_index = hh_size_list.index(most_houses) highest_probability = max(custom_distro.values()) lowest_probability = min(custom_distro.values()) most_houses_probability = custom_distro[most_index + 1] # hh_distro is 1 indexed fewest_houses_probability = custom_distro[fewest_index + 1] self.assertEqual( highest_probability, most_houses_probability, msg= "The most common household size should be the size with the highest probability" ) prob_bucket_list = list(custom_distro.values()) self.verify_portion_honored(probability_buckets=prob_bucket_list, count_buckets=hh_size_list, portion=0.25) self.verify_portion_honored(probability_buckets=prob_bucket_list, count_buckets=hh_size_list, portion=0.2) self.verify_portion_honored(probability_buckets=prob_bucket_list, count_buckets=hh_size_list, portion=0.1)
print(household_size_distr) Nhomes_to_sample_smooth = 100000 hh_sizes = sp.generate_household_sizes(Nhomes_to_sample_smooth, household_size_distr) totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes) # hh_sizes = sp.generate_household_sizes(Nhomes,household_size_distr) syn_ages, syn_sexes = sp.get_usa_age_sex_n(location, state_location, totalpop) syn_age_count = Counter(syn_ages) syn_age_distr = sp.norm_dic(Counter(syn_ages)) N = Nhomes hh_sizes = sp.generate_household_sizes_from_fixed_pop_size( N, household_size_distr) totalpop = sp.get_totalpopsize_from_household_sizes(hh_sizes) print(totalpop, 'pop') hha_df = sp.get_household_head_age_by_size_df(datadir, country_location, use_bayesian) hha_brackets = sp.get_head_age_brackets(datadir, country_location, use_bayesian) hha_by_size = sp.get_head_age_by_size_distr(datadir, country_location, use_bayesian) homes_dic, homes = sp.generate_all_households(hh_sizes, hha_by_size, hha_brackets, age_brackets, age_by_brackets_dic, contact_matrix_dic,