def test_accounting_attribute(self): """ """ annual_employment_control_totals_data = { "year": array([2000, 2000, 2000, 2001]), "sector_id": array([ 1, 2, 3, 2]), "number_of_jobs": array([25013, 1513, 5000, 10055]) } business_data = { "business_id":arange(1500)+1, "grid_id": array(1500*[1]), "sector_id": array(500*[1] + 500*[2] + 500*[3]), "jobs": array(500*[10] + 500*[10] + 500*[10]), } storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name='bs_set', table_data=business_data) bs_set = BusinessDataset(in_storage=storage, in_table_name='bs_set') storage.write_table(table_name='ect_set', table_data=annual_employment_control_totals_data) ect_set = ControlTotalDataset(in_storage=storage, in_table_name='ect_set', what='', id_name=[]) model = TransitionModel(bs_set, dataset_accounting_attribute='jobs', control_total_dataset=ect_set) model.run(year=2000, target_attribute_name="number_of_jobs", reset_dataset_attribute_value={'grid_id':-1}) results = bs_set.get_attribute('jobs').sum() should_be = [(ect_set.get_attribute("number_of_jobs")[0:3]).sum()] self.assertEqual(ma.allclose(should_be, results, rtol=10), True, "Error, should_be: %s, but result: %s" % (should_be, results)) cats = 3 results = zeros(cats, dtype=int32) for i in range(0, cats): results[i] = ( bs_set.get_attribute('jobs')*(bs_set.get_attribute('sector_id') == ect_set.get_attribute("sector_id")[i])).sum() should_be = ect_set.get_attribute("number_of_jobs")[0:3] self.assertEqual(ma.allclose(results, should_be, rtol=10), True, "Error, should_be: %s, but result: %s" % (should_be, results))
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute(self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute(self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names(): impute_sqft_flag = business_dataset.get_attribute("impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize(array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize(array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize(array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize(array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index=0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round((building_sqft[i]-building_sqft[i]/10.0)/float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size)+1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and(jobs_data["sqft"]>0, jobs_data["sqft"]<self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"]<=0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table( table_name="jobs", table_data=jobs_data ) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings(job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset(what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset(attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i,:] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and(in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in ['parcel_id', 'building_id', None]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and(not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed-this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace(where_eligible, to_be_removed-this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())
def run(self, in_storage, out_storage, business_table="business", jobs_table="jobs", control_totals_table=None): logger.log_status("Unrolling %s table." % business_table) # get attributes from the establisments table business_dataset = BusinessDataset(in_storage=in_storage, in_table_name=business_table) business_sizes = business_dataset.get_attribute( self.number_of_jobs_attr).astype("int32") sectors = business_dataset.get_attribute("sector_id") tazes = business_dataset.get_attribute( self.geography_id_attr).astype("int32") building_ids = array([], dtype='int32') if "building_id" in business_dataset.get_primary_attribute_names(): building_ids = business_dataset.get_attribute("building_id") parcel_ids = array([], dtype='int32') if "parcel_id" in business_dataset.get_primary_attribute_names(): parcel_ids = business_dataset.get_attribute("parcel_id") home_based = array([], dtype='int16') if "home_based" in business_dataset.get_primary_attribute_names(): home_based = business_dataset.get_attribute("home_based") building_sqft = business_dataset.get_attribute(self.sqft_attr) building_sqft[building_sqft <= 0] = 0 join_flags = None if "join_flag" in business_dataset.get_primary_attribute_names(): join_flags = business_dataset.get_attribute("join_flag") impute_sqft_flag = None if "impute_building_sqft_flag" in business_dataset.get_primary_attribute_names( ): impute_sqft_flag = business_dataset.get_attribute( "impute_building_sqft_flag") # inititalize jobs attributes total_size = business_sizes.sum() jobs_data = {} jobs_data["sector_id"] = resize(array([-1], dtype=sectors.dtype), total_size) jobs_data["building_id"] = resize( array([-1], dtype=building_ids.dtype), total_size) jobs_data["parcel_id"] = resize(array([-1], dtype=parcel_ids.dtype), total_size) jobs_data[self.geography_id_attr] = resize( array([-1], dtype=tazes.dtype), total_size) jobs_data["building_type"] = resize( array([-1], dtype=home_based.dtype), total_size) jobs_data["sqft"] = resize(array([], dtype=building_sqft.dtype), total_size) if join_flags is not None: jobs_data["join_flag"] = resize(array([], dtype=join_flags.dtype), total_size) if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"] = resize( array([], dtype=impute_sqft_flag.dtype), total_size) indices = cumsum(business_sizes) # iterate over establishments. For each business create the corresponding number of jobs by filling the corresponding part # of the arrays start_index = 0 for i in range(business_dataset.size()): end_index = indices[i] jobs_data["sector_id"][start_index:end_index] = sectors[i] if building_ids.size > 0: jobs_data["building_id"][start_index:end_index] = building_ids[ i] if parcel_ids.size > 0: jobs_data["parcel_id"][start_index:end_index] = parcel_ids[i] jobs_data[self.geography_id_attr][start_index:end_index] = tazes[i] if home_based.size > 0: jobs_data["building_type"][start_index:end_index] = home_based[ i] if self.compute_sqft_per_job: jobs_data["sqft"][start_index:end_index] = round( (building_sqft[i] - building_sqft[i] / 10.0) / float(business_sizes[i])) # sqft per employee else: jobs_data["sqft"][start_index:end_index] = building_sqft[i] if join_flags is not None: jobs_data["join_flag"][start_index:end_index] = join_flags[i] if impute_sqft_flag is not None: jobs_data["impute_building_sqft_flag"][ start_index:end_index] = impute_sqft_flag[i] start_index = end_index jobs_data["job_id"] = arange(total_size) + 1 if self.compute_sqft_per_job: jobs_data["sqft"] = clip(jobs_data["sqft"], 0, self.maximum_sqft) jobs_data["sqft"][logical_and( jobs_data["sqft"] > 0, jobs_data["sqft"] < self.minimum_sqft)] = self.minimum_sqft # correct missing job_building_types wmissing_bt = where(jobs_data["building_type"] <= 0)[0] if wmissing_bt.size > 0: jobs_data["building_type"][ wmissing_bt] = 2 # assign non-homebased type for now. It can be re-classified in the assign_bldgs_to_jobs... script # create jobs table and write it out storage = StorageFactory().get_storage('dict_storage') storage.write_table(table_name="jobs", table_data=jobs_data) job_dataset = JobDataset(in_storage=storage) if self.unplace_jobs_with_non_existing_buildings: self.do_unplace_jobs_with_non_existing_buildings( job_dataset, out_storage) # Match to control totals (only eliminate jobs if control totals are smaller than the actual number of jobs). if control_totals_table is not None: logger.log_status("Matching to control totals.") control_totals = ControlTotalDataset( what='employment', id_name=['zone_id', 'sector_id'], in_table_name=control_totals_table, in_storage=in_storage) control_totals.load_dataset( attributes=['zone_id', 'sector_id', 'jobs']) zones_sectors = control_totals.get_id_attribute() njobs = control_totals.get_attribute('jobs') remove = array([], dtype='int32') for i in range(zones_sectors.shape[0]): zone, sector = zones_sectors[i, :] in_sector = job_dataset.get_attribute("sector_id") == sector in_zone_in_sector = logical_and( in_sector, job_dataset.get_attribute("zone_id") == zone) if in_zone_in_sector.sum() <= njobs[i]: continue to_be_removed = in_zone_in_sector.sum() - njobs[i] this_removal = 0 not_considered = ones(job_dataset.size(), dtype='bool8') for unit in [ 'parcel_id', 'building_id', None ]: # first consider jobs without parcel id, then without building_id, then all if unit is not None: wnunit = job_dataset.get_attribute(unit) <= 0 eligible = logical_and( not_considered, logical_and(in_zone_in_sector, wnunit)) not_considered[where(wnunit)] = False else: eligible = logical_and(not_considered, in_zone_in_sector) eligible_sum = eligible.sum() if eligible_sum > 0: where_eligible = where(eligible)[0] if eligible_sum <= to_be_removed - this_removal: draw = arange(eligible_sum) else: draw = sample_noreplace( where_eligible, to_be_removed - this_removal, eligible_sum) remove = concatenate((remove, where_eligible[draw])) this_removal += draw.size if this_removal >= to_be_removed: break job_dataset.remove_elements(remove) logger.log_status("%s jobs removed." % remove.size) logger.log_status("Write jobs table.") job_dataset.write_dataset(out_table_name=jobs_table, out_storage=out_storage) logger.log_status("Created %s jobs." % job_dataset.size())