def test_controlling_age_of_head(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), "age_of_head": array([0,1,2,0,1,2, 0,1,2]), "total_number_of_households": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(3*['age_of_head']), "min": array([0, 35, 65]), "max": array([34, 64, -1]) } households_data = { "household_id":arange(15000)+1, "building_id": array(15000*[1]), "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] + 2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] + 1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]), "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 5000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_income(self): """ Controls for one marginal characteristics, namely income. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]), "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]), "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000 10055, 15003, 17999, 17654, # 2001 15678, 14001, 20432, 14500]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(4*['income']), "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 "max": array([39999, 69999, -1, 119999]) # (testing row invariance) } hc_sorted_index = array([0,1,3,2]) households_data = { "household_id":arange(20000)+1, "building_id": array(19950*[1] + 50*[0]), "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] + 1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] + 1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]), "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 8000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [83246] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:4] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[4:8] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[8:13] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))