def test_accounting_attribute(self): """ """ annual_employment_control_totals_data = { "year": array([2000, 2000, 2000, 2001]), "sector_id": array([ 1, 2, 3, 2]), "number_of_jobs": array([25013, 1513, 5000, 10055]) } business_data = { "business_id":arange(1500)+1, "grid_id": array(1500*[1]), "sector_id": array(500*[1] + 500*[2] + 500*[3]), "jobs": array(500*[10] + 500*[10] + 500*[10]), } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='bs_set', table_data=business_data) bs_set = BusinessDataset(in_storage=storage, in_table_name='bs_set') storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data) ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='', id_name=[]) model = TransitionModel(bs_set, dataset_accounting_attribute='jobs', control_total_dataset=ect_set) model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = bs_set.get_attribute('jobs').sum() should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=10), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = ( bs_set.get_attribute('jobs')*(bs_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i])).sum() should_be = ect_set.get_attribute("number_of_jobs")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=10), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_age_of_head(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), "age_of_head": array([0,1,2,0,1,2, 0,1,2]), "total_number_of_households": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(3*['age_of_head']), "min": array([0, 35, 65]), "max": array([34, 64, -1]) } households_data = { "household_id":arange(15000)+1, "building_id": array(15000*[1]), "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] + 2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] + 1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]), "persons": array(1000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 5000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'age_of_head']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[0], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[i], 1,0), where(hh_set.get_attribute('age_of_head') <= hc_set.get_attribute("max")[i], 1,0)).sum() results[hc_set.size()-1] = where(hh_set.get_attribute('age_of_head') >= hc_set.get_attribute("min")[hc_set.size()-1], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_income(self): """ Controls for one marginal characteristics, namely income. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]), "income": array([0,1,2,3,0,1,2,3, 0,1,2,3]), "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000 10055, 15003, 17999, 17654, # 2001 15678, 14001, 20432, 14500]) # 2002 } household_characteristics_for_ht_data = { "characteristic": array(4*['income']), "min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 "max": array([39999, 69999, -1, 119999]) # (testing row invariance) } hc_sorted_index = array([0,1,3,2]) households_data = { "household_id":arange(20000)+1, "building_id": array(19950*[1] + 50*[0]), "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] + 1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] + 1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]), "persons": array(3000*[2] + 2000*[3] + 1000*[1] + 1000*[6] + 1000*[1] + 1000*[4] + 3000*[1]+ 8000*[5], dtype=int8) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=['year' ,'income']) storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') storage.write_table(table_name='prs_set', table_data=self.person_data) prs_set = PersonDataset(in_storage=storage, in_table_name='prs_set') model = HouseholdTransitionModel(debuglevel=3) # this run should add households in all four categories model.run(year=2000, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [83246] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:4] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories model.run(year=2001, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[4:8] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households model.run(year=2002, person_set=prs_set, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[8:13]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(hc_set.size(), dtype=int32) results[0] = where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[0]], 1,0).sum() for i in range(1, hc_set.size()-1): results[i] = logical_and(where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[i]], 1,0), where(hh_set.get_attribute('income') <= hc_set.get_attribute("max")[hc_sorted_index[i]], 1,0)).sum() results[-1] = where(hh_set.get_attribute('income') >= hc_set.get_attribute("min")[hc_sorted_index[-1]], 1,0).sum() should_be = hct_set.get_attribute("total_number_of_households")[8:13] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(): impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index=0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size)+1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"]<=0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name="jobs", table_data=jobs_data ) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i,:] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed-this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())
def run(self): """Runs the test model. """ dataset_pool = SessionConfiguration().get_dataset_pool() zone_set = dataset_pool.get_dataset('zone') zone_pop = zone_set.compute_variables('_zone_pop = zone.aggregate(household.persons,intermediates=[building,parcel])') county_pop = zone_set.compute_variables('_county_pop = zone.aggregate(parcel.disaggregate(county.aggregate(household.persons,intermediates=[building,parcel])),function=median)') #zone_set.add_primary_attribute(name='county_pop', data=county_pop) regional_pop = zone_set.compute_variables('_regional_pop = zone.disaggregate(alldata.aggregate_all(household.persons))') #zone_set.add_primary_attribute(name='regional_pop', data=regional_pop) local_gov_jobs = zone_set.compute_variables('_local_gov_jobs = zone._zone_pop * zone.disaggregate(zone_gov_ed_job.local_gov)') local_ed_k12_jobs = zone_set.compute_variables('_ed_k12 = zone._zone_pop * zone.disaggregate(zone_gov_ed_job.ed_k12)') county_gov_jobs = zone_set.compute_variables('_county_gov_jobs = zone._county_pop * zone.disaggregate(zone_gov_ed_job.county_gov)') #county_gov_job_coeff = zone_set.compute_variables('_county_gov_job_coeff = zone.disaggregate(zone_gov_ed_job.county_gov)') #zone_set.add_primary_attribute(name='county_gov_jobs', data=county_gov_jobs) #zone_set.add_primary_attribute(name='county_gov_job_coeff', data=county_gov_job_coeff) state_gov_jobs = zone_set.compute_variables('_state_gov_jobs = zone._regional_pop * zone.disaggregate(zone_gov_ed_job.state_gov)') fed_gov_jobs = zone_set.compute_variables('_fed_gov_jobs = zone._regional_pop * zone.disaggregate(zone_gov_ed_job.fed_gov)') ed_high_jobs = zone_set.compute_variables('_ed_high_jobs = zone._regional_pop * zone.disaggregate(zone_gov_ed_job.ed_high)') gov_jobs = zone_set.compute_variables('_gov_jobs = _local_gov_jobs + _county_gov_jobs + _state_gov_jobs + _fed_gov_jobs') edu_jobs = zone_set.compute_variables('_ed_jobs = _ed_k12 + _ed_high_jobs') current_year = SimulationState().get_current_time() base_year = '2010' base_cache_storage = AttributeCache().get_flt_storage_for_year(base_year) control_totals = ControlTotalDataset(in_storage=base_cache_storage, in_table_name="annual_business_control_totals") number_of_jobs = control_totals.get_attribute("total_number_of_jobs") idx_current_edother = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618320))[0] jobs_current_edother = number_of_jobs[idx_current_edother].sum() idx_current_edhigh = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618330))[0] jobs_current_edhigh = number_of_jobs[idx_current_edhigh].sum() idx_current_edk12 = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618340))[0] jobs_current_edk12 = number_of_jobs[idx_current_edk12].sum() idx_current_gov = where(logical_and(control_totals.get_attribute("year")==current_year,control_totals.get_attribute("sector_id")==618319))[0] total_gov_jobs = number_of_jobs[idx_current_gov].sum() total_edu_jobs = jobs_current_edother + jobs_current_edhigh + jobs_current_edk12 gov_scaling_ratio=total_gov_jobs*1.0/gov_jobs.sum() edu_scaling_ratio=total_edu_jobs*1.0/edu_jobs.sum() gov_jobs = around(gov_jobs*gov_scaling_ratio) edu_jobs = around(edu_jobs*edu_scaling_ratio) zone_set.add_primary_attribute(name='gov_jobs', data=gov_jobs) zone_set.add_primary_attribute(name='edu_jobs', data=edu_jobs)
def test_controlling_sector(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_employment_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), "sector_id": array([ 1,2,3, 1,2,3, 1,2,3]), "number_of_jobs": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } jobs_data = { "job_id":arange(15000)+1, "grid_id": array(15000*[1]), "sector_id": array(1000*[1] + 1000*[1] + 2000*[1] + 1000*[1] + 2000*[2] + 1000*[2] + 1000*[2]+ 1000*[2] + 1000*[3] + 1000*[3] + 2000*[3] + 1000*[3]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='job_set', table_data=jobs_data) job_set = JobDataset(in_storage=storage, in_table_name='job_set') storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data) ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='', id_name=[]) model = TransitionModel(job_set, control_total_dataset=ect_set) model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = job_set.size() should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i]).sum() should_be = ect_set.get_attribute("number_of_jobs")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2001, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = job_set.size() should_be = [(ect_set.get_attribute("number_of_jobs")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i+3]).sum() should_be = ect_set.get_attribute("number_of_jobs")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2002, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = job_set.size() should_be = [(ect_set.get_attribute("number_of_jobs")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = (job_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i+6]).sum() should_be = ect_set.get_attribute("number_of_jobs")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_age_of_head(self): """ Controls for one marginal characteristics, namely age_of_head. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2001, 2001, 2001, 2002, 2002, 2002]), #"age_of_head": array([0,1,2,0,1,2, 0,1,2]), "age_of_head_min": array([ 0,35,65, 0,35,65, 0,35,65]), "age_of_head_max": array([34,64,-1, 34,64,-1, 34,64,-1]), "total_number_of_households": array([25013, 21513, 18227, # 2000 10055, 15003, 17999, # 2001 15678, 14001, 20432]) # 2002 } #household_characteristics_for_ht_data = { #"characteristic": array(3*['age_of_head']), #"min": array([0, 35, 65]), #"max": array([34, 64, -1]) #} households_data = { "household_id":arange(15000)+1, "grid_id": array(15000*[1]), "age_of_head": array(1000*[25] + 1000*[28] + 2000*[32] + 1000*[34] + 2000*[35] + 1000*[40] + 1000*[54]+ 1000*[62] + 1000*[65] + 1000*[68] + 2000*[71] + 1000*[98]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=[]) #storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) #hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = TransitionModel(hh_set, control_total_dataset=hct_set) model.run(year=2000, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1}) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) results[0] = (hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[0]).sum() for i in range(1, cats-1): results[i] = logical_and(hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i], hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[i]).sum() results[-1] = (hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+1]).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2001, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1}) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[3:6]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(cats, dtype=int32) results[0] = (hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[0]).sum() for i in range(1, cats-1): results[i] = logical_and(hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+3], hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[i+3]).sum() results[-1] = (hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+4]).sum() should_be = hct_set.get_attribute("total_number_of_households")[3:6] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2002, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1}) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[6:9]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(cats, dtype=int32) results[0] = where(hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[0], 1,0).sum() for i in range(1, cats-1): results[i] = logical_and(hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+6], hh_set.get_attribute('age_of_head') <= hct_set.get_attribute("age_of_head_max")[i+6]).sum() results[-1] = (hh_set.get_attribute('age_of_head') >= hct_set.get_attribute("age_of_head_min")[i+7]).sum() should_be = hct_set.get_attribute("total_number_of_households")[6:9] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def test_controlling_income(self): """ Controls for one marginal characteristics, namely income. """ annual_household_control_totals_data = { "year": array([2000, 2000, 2000, 2000, 2001, 2001, 2001, 2001, 2002, 2002, 2002, 2002]), #"income": array([0,1,2,3,0,1,2,3, 0,1,2,3]), "income_min": array([ 0,40000, 70000,120000, 0,40000, 70000,120000, 0,40000, 70000,120000]), "income_max": array([39999,69999,119999, -1, 39999,69999,119999, -1, 39999,69999,119999, -1]), "total_number_of_households": array([25013, 21513, 18227, 18493, # 2000 10055, 15003, 17999, 17654, # 2001 15678, 14001, 20432, 14500]) # 2002 } #household_characteristics_for_ht_data = { #"characteristic": array(4*['income']), #"min": array([0, 40000, 120000, 70000]), # category 120000 has index 3 and category 70000 has index 2 #"max": array([39999, 69999, -1, 119999]) # (testing row invariance) #} #hc_sorted_index = array([0,1,3,2]) households_data = { "household_id":arange(20000)+1, "grid_id": array(19950*[1] + 50*[0]), "income": array(1000*[1000] + 1000*[10000] + 2000*[20000] + 1000*[35000] + 2000*[45000] + 1000*[50000] + 2000*[67000]+ 2000*[90000] + 1000*[100005] + 2000*[110003] + 1000*[120000] + 1000*[200000] + 2000*[500000] + 1000*[630000]) } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='hh_set', table_data=households_data) hh_set = HouseholdDataset(in_storage=storage, in_table_name='hh_set') storage.write_table(table_name='hct_set', table_data=annual_household_control_totals_data) hct_set = ControlTotalDataset(in_storage=storage, in_table_name='hct_set', what='household', id_name=[]) #storage.write_table(table_name='hc_set', table_data=household_characteristics_for_ht_data) #hc_set = HouseholdCharacteristicDataset(in_storage=storage, in_table_name='hc_set') model = TransitionModel(hh_set, control_total_dataset=hct_set) model.run(year=2000, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1}) results = hh_set.size() should_be = [83246] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 4 results = zeros(cats, dtype=int32) results[0] = (hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[0]).sum() for i in range(1, cats-1): results[i] = logical_and(hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i], hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[i]).sum() results[-1] = (hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+1]).sum() should_be = hct_set.get_attribute("total_number_of_households")[0:4] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should remove households in all four categories #model.run(year=2001, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2001, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1}) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[4:8]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(cats, dtype=int32) results[0] = (hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[4]).sum() for i in range(1, cats-1): results[i] = logical_and(hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+4], hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[i+4]).sum() results[-1] = (hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+5]).sum() should_be = hct_set.get_attribute("total_number_of_households")[4:8] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results)) # this run should add and remove households #model.run(year=2002, household_set=hh_set, control_totals=hct_set, characteristics=hc_set) model.run(year=2002, target_attribute_name="total_number_of_households", reset_dataset_attribute_value={'grid_id':-1}) results = hh_set.size() should_be = [(hct_set.get_attribute("total_number_of_households")[8:12]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=1e-1), True, "Error, should_be: %s, but result: %s" % (should_be, results)) results = zeros(cats, dtype=int32) results[0] = (hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[8]).sum() for i in range(1, cats-1): results[i] = logical_and(hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+8], hh_set.get_attribute('income') <= hct_set.get_attribute("income_max")[i+8]).sum() results[-1] = (hh_set.get_attribute('income') >= hct_set.get_attribute("income_min")[i+9]).sum() should_be = hct_set.get_attribute("total_number_of_households")[8:12] self.assertEqual(ma.allclose(results, should_be, rtol=1e-6), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute( self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute( self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names( ): impute_sqft_flag = business_dataset.get_attribute( "impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize( array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize( array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize( array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize( array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index = 0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[ i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[ i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round( (building_sqft[i] - building_sqft[i] / 10.0) / float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][ start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size) + 1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and( jobs_data["sqft"] > 0, jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"] <= 0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][ wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name="jobs", table_data=jobs_data) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings( job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset( what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset( attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i, :] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and( in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in [ 'parcel_id', 'building_id', None ]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and( not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed - this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace( where_eligible, to_be_removed - this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())