def households(store): df = store['households'] df = df[df.building_id > 0] ##Revisit the allocation and remove GQ from synthetic population? p = store['parcels'] b = store['buildings'] b['luz'] = misc.reindex(p.luz_id, b.parcel_id) df['base_luz'] = misc.reindex(b.luz, df.building_id) df['segmentation_col'] = 1 return df
def luz_base_indicators(store): households = store['households'][['building_id']] jobs = store['jobs'][['building_id']] buildings = store['buildings'][['parcel_id']] parcels = store['parcels'][['luz_id']] buildings['luz_id'] = misc.reindex(parcels.luz_id, buildings.parcel_id) households['luz_id'] = misc.reindex(buildings.luz_id, households.building_id) jobs['luz_id'] = misc.reindex(buildings.luz_id, jobs.building_id) hh_luz_base = households.groupby('luz_id').size() emp_luz_base = jobs.groupby('luz_id').size() return pd.DataFrame({'hh_base':hh_luz_base, 'emp_base':emp_luz_base})
def parcel_average_price(use): if use == "residential": buildings = orca.get_table('buildings') s = misc.reindex(buildings. res_price_per_sqft[buildings.general_type == "Residential"]. groupby(buildings.luz_id).quantile(.85), orca.get_table('parcels').luz_id).clip(150, 1250) return s return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def scheduled_development_events(buildings, development_projects, demolish_events, summary, year, parcels, settings, years_per_iter, parcels_geography, building_sqft_per_job, vmt_fee_categories): # first demolish demolish = demolish_events.to_frame().\ query("%d <= year_built < %d" % (year, year + years_per_iter)) print "Demolishing/building %d buildings" % len(demolish) l1 = len(buildings) buildings = utils._remove_developed_buildings( buildings.to_frame(buildings.local_columns), demolish, unplace_agents=["households", "jobs"]) orca.add_table("buildings", buildings) buildings = orca.get_table("buildings") print "Demolished %d buildings" % (l1 - len(buildings)) print " (this number is smaller when parcel has no existing buildings)" # then build dps = development_projects.to_frame().\ query("%d <= year_built < %d" % (year, year + years_per_iter)) if len(dps) == 0: return new_buildings = utils.scheduled_development_events( buildings, dps, remove_developed_buildings=False, unplace_agents=['households', 'jobs']) new_buildings["form"] = new_buildings.building_type.map( settings['building_type_map']).str.lower() new_buildings["job_spaces"] = new_buildings.non_residential_sqft / \ new_buildings.building_type.fillna("OF").map(building_sqft_per_job) new_buildings["job_spaces"] = new_buildings.job_spaces.\ fillna(0).astype('int') new_buildings["geom_id"] = parcel_id_to_geom_id(new_buildings.parcel_id) new_buildings["SDEM"] = True new_buildings["subsidized"] = False new_buildings["zone_id"] = misc.reindex( parcels.zone_id, new_buildings.parcel_id) new_buildings["vmt_res_cat"] = misc.reindex( vmt_fee_categories.res_cat, new_buildings.zone_id) del new_buildings["zone_id"] new_buildings["pda"] = parcels_geography.pda_id.loc[ new_buildings.parcel_id].values summary.add_parcel_output(new_buildings)
def write_parcel_output(self, add_xy=None): """ Write the parcel-level output to a csv file Parameters ---------- add_xy : dictionary (optional) Used to add x, y values to the output - an example dictionary is pasted below - the parameters should be fairly self explanatory. Note that from_epsg and to_epsg can be omitted in which case the coordinate system is not changed. NOTE: pyproj is required if changing coordinate systems:: { "xy_table": "parcels", "foreign_key": "parcel_id", "x_col": "x", "y_col": "y", "from_epsg": 3740, "to_epsg": 4326 } Returns ------- Nothing """ if self.parcel_output is None: return po = self.parcel_output if add_xy is not None: x_name, y_name = add_xy["x_col"], add_xy["y_col"] xy_joinname = add_xy["foreign_key"] xy_df = orca.get_table(add_xy["xy_table"]) po[x_name] = misc.reindex(xy_df[x_name], po[xy_joinname]) po[y_name] = misc.reindex(xy_df[y_name], po[xy_joinname]) if "from_epsg" in add_xy and "to_epsg" in add_xy: import pyproj p1 = pyproj.Proj('+init=epsg:%d' % add_xy["from_epsg"]) p2 = pyproj.Proj('+init=epsg:%d' % add_xy["to_epsg"]) x2, y2 = pyproj.transform(p1, p2, po[x_name].values, po[y_name].values) po[x_name], po[y_name] = x2, y2 po.to_csv(self.parcel_indicator_file, index_label="development_id")
def ln_pop_within_20min(zones, t_data_dist20): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id']) zonal_pop=zones.zonal_pop t_data=t_data_dist20.to_frame() t_data.loc[:,'attr']=zonal_pop[t_data_dist20.to_zone_id].values zone_time_range=t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range, b.zone_id).apply(np.log1p)
def parcel_average_price(use): if len(orca.get_table('nodes').index) == 0: return pd.Series(0, orca.get_table('parcels').index) if not use in orca.get_table('nodes').columns: return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def empden_zone_sector(sector, bzone_id): # non-interaction from variables_zones import number_of_jobs_of_sector zones = orca.get_table('zones') zone_density = number_of_jobs_of_sector(sector, zones, orca.get_table('jobs'))/zones.acres zone_density[~np.isfinite(zone_density)] = 0 return misc.reindex(zone_density, bzone_id)
def ave_sqft_per_unit(parcels, zones, settings): s = misc.reindex(zones.ave_unit_sqft, parcels.zone_id) clip = settings.get("ave_sqft_per_unit_clip", None) if clip is not None: s = s.clip(lower=clip['lower'], upper=clip['upper']) ''' This is a fun feature that lets you set max dua for new contruction based on the dua (as an indicator of density and what part of the city we are). Example use in the YAML: clip_sqft_per_unit_based_on_dua: - threshold: 50 max: 1000 - threshold: 100 max: 900 - threshold: 150 max: 800 ''' cfg = settings.get("clip_sqft_per_unit_based_on_dua", None) if cfg is not None: for clip in cfg: s[parcels.max_dua >= clip["threshold"]] = clip["max"] return s
def juris_ave_income(households, buildings, parcels_geography, parcels): h = orca.merge_tables("households", [households, buildings, parcels_geography], columns=["jurisdiction_id", "income"]) s = h.groupby(h.jurisdiction_id).income.quantile(.5) return misc.reindex(s, parcels_geography.jurisdiction_id).\ reindex(parcels.index).fillna(s.median()).apply(np.log1p)
def max_far(parcels, zoning): sr = misc.reindex(zoning.max_far, parcels.zoning_id) sr = sr*parcels.proportion_developable df = pd.DataFrame({'max_far':sr.values}, index = sr.index.values) df['index'] = df.index.values df = df.drop_duplicates() del df['index'] df.index.name = 'parcel_id' return df.max_far
def move_jobs_from_portola_to_san_mateo_county(parcels, buildings, jobs_df): # need to move jobs from portola valley to san mateo county NUM_IN_PORTOLA = 1500 juris = misc.reindex( parcels.juris, misc.reindex(buildings.parcel_id, jobs_df.building_id)) # find jobs in portols valley to move portola = jobs_df[juris == "Portola Valley"] move = portola.sample(len(portola) - NUM_IN_PORTOLA) # find places in san mateo to which to move them san_mateo = jobs_df[juris == "San Mateo County"] move_to = san_mateo.sample(len(move)) jobs_df.loc[move.index, "building_id"] = move_to.building_id.values return jobs_df
def ave_sqft_per_unit(parcels, nodes, settings): if len(nodes) == 0: # if nodes isn't generated yet return pd.Series(index=parcels.index) s = misc.reindex(nodes.ave_sqft_per_unit, parcels.node_id) clip = settings.get("ave_sqft_per_unit_clip", None) if clip is not None: s = s.clip(lower=clip['lower'], upper=clip['upper']) return s
def building_purchase_price_sqft(): # buildings = orca.get_table('buildings') # s = misc.reindex(buildings.res_price_per_sqft[buildings.general_type == # "Residential"]. # groupby(buildings.luz_id).quantile(.4), # orca.get_table('parcels').luz_id).clip(90, 700) s = misc.reindex(orca.get_table('nodes')['residential'], orca.get_table('parcels').node_id) return s * .81 # In relation to Bay Area via RS Means metro scaling factor
def abstract_within_walking_distance_parcels(attribute_name, parcels, gridcells, settings, walking_radius=None, **kwargs): gcl_values = parcels[attribute_name].groupby(parcels.grid_id).sum().reindex(gridcells.index).fillna(0) res = misc.reindex(abstract_within_walking_distance_gridcells(gcl_values, gridcells, cell_size=settings.get('cell_size', 150), walking_distance_circle_radius=(walking_radius or settings.get('cell_walking_radius', 600)), mode=settings.get("wwd_correlate_mode", "reflect"), **kwargs), parcels.grid_id) #TODO: this step should not be needed if all parcels have an exisitng gridcell assigned res[np.isnan(res)] = 0 return res
def accessory_units(year, buildings, parcels): add_units = pd.read_csv("data/accessory_units.csv", index_col="juris")[str(year)] buildings_juris = misc.reindex(parcels.juris, buildings.parcel_id) res_buildings = buildings_juris[buildings.general_type == "Residential"] add_buildings = groupby_random_choice(res_buildings, add_units) add_buildings = pd.Series(add_buildings.index).value_counts() buildings.local.loc[add_buildings.index, "residential_units"] += \ add_buildings.values
def ln_emp_sector5_within_20min(t_data_dist20): b = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['zone_id']) e =orca.get_table('establishments').to_frame(columns=['sector_id_six','zone_id','employees']) e = e.loc[e.sector_id_six == 5] zonal_emp = e.groupby('zone_id').employees.sum() t_data=t_data_dist20.to_frame() t_data.loc[:,'attr']=zonal_emp[t_data_dist20.to_zone_id].values zone_time_range=t_data.groupby(level=0).attr.apply(np.sum) return reindex(zone_time_range,b.zone_id).apply(np.log1p)
def juris_ave_income(households, buildings, parcels_geography, parcels): # get frame of income and jurisdiction h = orca.merge_tables("households", [households, buildings, parcels_geography], columns=["jurisdiction_id", "income"]) # get median income by jurisdiction s = h.groupby(h.jurisdiction_id).income.quantile(.5) # map it to parcels - fill na with median for all areas # should probably remove the log transform and do that in the models return misc.reindex(s, parcels_geography.jurisdiction_id).\ reindex(parcels.index).fillna(s.median()).apply(np.log1p)
def parcel_average_price(use, quantile=.5): # I'm testing out a zone aggregation rather than a network aggregation # because I want to be able to determine the quantile of the distribution # I also want more spreading in the development and not keep it so localized if use == "residential": buildings = orca.get_table('buildings') s = misc.reindex(buildings. residential_price[buildings.general_type == "Residential"]. groupby(buildings.zone_id).quantile(.8), orca.get_table('parcels').zone_id).clip(150, 1250) cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters return s / cost_shifters * price_shifters if 'nodes' not in orca.list_tables(): return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def parcel_average_price(use, quantile=.5): if use == "residential": # get node price average and put it on parcels s = misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id) # apply shifters cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters taz2_shifters = orca.get_table("parcels").taz2_price_shifters s = s / cost_shifters * price_shifters * taz2_shifters # just to make sure we're in a reasonable range return s.fillna(0).clip(150, 1250) if 'nodes' not in orca.list_tables(): # just to keep from erroring return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def allocate_jobs(baseyear_taz_controls, settings, buildings, parcels): # this does a new assignment from the controls to the buildings # first disaggregate the job totals sector_map = settings["naics_to_empsix"] jobs = [] for taz, row in baseyear_taz_controls.local.iterrows(): for sector_col, num in row.iteritems(): # not a sector total if not sector_col.startswith("emp_sec"): continue # get integer sector id sector_id = int(''.join(c for c in sector_col if c.isdigit())) sector_name = sector_map[sector_id] jobs += [[sector_id, sector_name, taz, -1]] * int(num) df = pd.DataFrame(jobs, columns=[ 'sector_id', 'empsix', 'taz', 'building_id']) zone_id = misc.reindex(parcels.zone_id, buildings.parcel_id) # just do random assignment weighted by job spaces - we'll then # fill in the job_spaces if overfilled in the next step (code # has existed in urbansim for a while) for taz, cnt in df.groupby('taz').size().iteritems(): potential_add_locations = buildings.non_residential_sqft[ (zone_id == taz) & (buildings.non_residential_sqft > 0)] if len(potential_add_locations) == 0: # if no non-res buildings, put jobs in res buildings potential_add_locations = buildings.building_sqft[ zone_id == taz] weights = potential_add_locations / potential_add_locations.sum() # print taz, len(potential_add_locations),\ # potential_add_locations.sum(), cnt buildings_ids = potential_add_locations.sample( cnt, replace=True, weights=weights) df["building_id"][df.taz == taz] = buildings_ids.index.values s = zone_id.loc[df.building_id].value_counts() # assert that we at least got the total employment right after assignment assert_series_equal(baseyear_taz_controls.emp_tot, s) return df
def ln_income_x_average_resunit_size(households, buildings, parcels): building_data = pd.Series(index=buildings.index) p_zone_id = parcels.zone_id b_zone_id = p_zone_id.loc[buildings.parcel_id] building_data.loc[:] = b_zone_id.values hh_data = pd.Series(index=households.index) hh_zone_id = building_data.loc[households.building_id] hh_data.loc[:] = hh_zone_id.values ln_income = households.ln_income.groupby(hh_data).mean() avg_resunit_size = buildings.sqft_per_unit.groupby(building_data).mean() ln_income_x_average_resunit_size = ln_income * avg_resunit_size return reindex(ln_income_x_average_resunit_size,building_data)
def ave_sqft_per_unit(parcels, zones, settings): s = misc.reindex(zones.ave_unit_sqft, parcels.zone_id) clip = settings.get("ave_sqft_per_unit_clip", None) if clip is not None: s = s.clip(lower=clip['lower'], upper=clip['upper']) cfg = settings.get("clip_sqft_per_unit_based_on_dua", None) if cfg is not None: for clip in cfg: s[parcels.max_dua >= clip["threshold"]] = clip["max"] return s
def parcel_avg_price(use): #if use is residential translate unit price to price per sqft buildings = orca.merge_tables('buildings', tables=['buildings','parcels'], columns=['unit_price_residential','building_type_id','residential_sqft', 'zone_id', 'unit_price_non_residential']) use_btype = orca.get_injectable('use_btype') if use == 'residential': price = (buildings.unit_price_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])] / buildings.residential_sqft.loc[np.in1d(buildings.building_type_id, use_btype[use])]).groupby(buildings.zone_id).mean() else: price = buildings.unit_price_non_residential.loc[np.in1d(buildings.building_type_id, use_btype[use])].groupby(buildings.zone_id).mean() return misc.reindex(price, orca.get_table('parcels').zone_id)
def percent_younghead_x_younghead(buildings, households, zones,parcels): building_data = pd.Series(index=buildings.index) p_zone_id = parcels.zone_id b_zone_id = p_zone_id.loc[buildings.parcel_id] building_data.loc[:] = b_zone_id.values hh_data = pd.Series(index=households.index) hh_zone_id = building_data.loc[households.building_id] hh_data.loc[:] = hh_zone_id.values percent_younghead = zones.percent_younghead younghead = households.age_of_head.groupby(hh_data).size() percent_younghead_x_younghead = percent_younghead * younghead return reindex(percent_younghead_x_younghead, building_data)
def zoning_allowed_uses(store, parcels): parcels_allowed = store['zoning_allowed_uses'] parcels = orca.get_table('parcels').to_frame(columns = ['zoning_id',]) allowed_df = pd.DataFrame(index = parcels.index) for devtype in np.unique(parcels_allowed.development_type_id): devtype_allowed = parcels_allowed[parcels_allowed.development_type_id == devtype].set_index('zoning_id') allowed = misc.reindex(devtype_allowed.development_type_id, parcels.zoning_id) df = pd.DataFrame(index=allowed.index) df['allowed'] = False df[~allowed.isnull()] = True allowed_df[devtype] = df.allowed return allowed_df
def zoning_allowed_uses(store, parcels): zoning_allowed_uses_df = store['zoning_allowed_uses'] parcels = parcels.to_frame(columns = ['zoning_id',]) allowed_df = pd.DataFrame(index=parcels.index) for devtype in np.unique(zoning_allowed_uses_df.index.values): devtype_allowed = zoning_allowed_uses_df.loc[zoning_allowed_uses_df.index == devtype]\ .reset_index().set_index('zoning_id') allowed = misc.reindex(devtype_allowed.development_type_id, parcels.zoning_id) df = pd.DataFrame(data=False, index=allowed.index, columns=['allowed']) df[~allowed.isnull()] = True allowed_df[devtype] = df.allowed return allowed_df
def sqft_per_job(buildings, building_sqft_per_job, superdistricts, taz_geography): sqft_per_job = buildings.\ building_type.fillna("O").map(building_sqft_per_job) # this factor changes all sqft per job according to which superdistrict # the building is in - this is so denser areas can have lower sqft # per job - this is a simple multiply so a number 1.1 increases the # sqft per job by 10% and .9 decreases it by 10% superdistrict = misc.reindex( taz_geography.superdistrict, buildings.zone_id) sqft_per_job = sqft_per_job * \ superdistrict.map(superdistricts.sqft_per_job_factor) return sqft_per_job
def cnml(parcels, non_mandatory_accessibility, accessibilities_segmentation): nmand_acc = non_mandatory_accessibility.local acc_seg = accessibilities_segmentation.local cols_to_sum = [] for col in nmand_acc.columns[~nmand_acc.columns.isin(['destChoiceAlt', 'taz', 'subzone', 'weighted_sum'])]: if col in acc_seg.columns: nmand_acc[col] = ((nmand_acc[col] - nmand_acc[col].min()) / 0.0175) * acc_seg.loc[0, col] cols_to_sum.append(col) nmand_acc['weighted_sum'] = nmand_acc[cols_to_sum].sum(axis=1) df = misc.reindex(nmand_acc.weighted_sum, parcels.subzone) return df.reindex(parcels.index).fillna(-1)
def parcel_average_price(use, quantile=.5): # I'm testing out a zone aggregation rather than a network aggregation # because I want to be able to determine the quantile of the distribution # I also want more spreading in the development and not keep it localized if use == "residential": # get node price average and put it on parcels s = misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id) * 1.3 # apply shifters cost_shifters = orca.get_table("parcels").cost_shifters price_shifters = orca.get_table("parcels").price_shifters s = s / cost_shifters * price_shifters # just to make sure s = s.fillna(0).clip(150, 1250) return s if 'nodes' not in orca.list_tables(): return pd.Series(0, orca.get_table('parcels').index) return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def semmcd(households, buildings): return misc.reindex(buildings.semmcd, households.building_id)
def b_city_id(households, buildings): return misc.reindex(buildings.b_city_id, households.building_id)
def large_area_id(group_quarters, buildings): return misc.reindex(buildings.large_area_id, group_quarters.building_id)
def semmcd(group_quarters, buildings): return misc.reindex(buildings.semmcd, group_quarters.building_id)
def b_city_id(group_quarters, buildings): return misc.reindex(buildings.b_city_id, group_quarters.building_id)
def zone_id(jobs, buildings): return misc.reindex(buildings.zone_id, jobs.building_id)
def semmcd(households, persons): return misc.reindex(households.semmcd, persons.household_id)
def subregion(taz_geography, parcels): return misc.reindex(taz_geography.subregion, parcels.zone_id)
def vmt_code(parcels, vmt_fee_categories): return misc.reindex(vmt_fee_categories.res_cat, parcels.zone_id)
def zone_id(residential_units, buildings): return misc.reindex(buildings.zone_id, residential_units.building_id)
def whatnot_id(households, buildings): return misc.reindex(buildings.whatnot_id, households.building_id)
def zone_id(buildings, parcels): return misc.reindex(parcels.zone_id, buildings.parcel_id)
def y(households, buildings): return misc.reindex(buildings.y, households.building_id)
def large_area_id(households, persons): return misc.reindex(households.large_area_id, persons.household_id)
def nodeid_drv(households, buildings): return misc.reindex(buildings.nodeid_drv, households.building_id)
def zone_id(group_quarters, buildings): return misc.reindex(buildings.zone_id, group_quarters.building_id)
def b_zone_id(persons, households): return misc.reindex(households.b_zone_id, persons.household_id)
def node_id(households, buildings): return misc.reindex(buildings.node_id, households.building_id)
def whatnot_id(households, persons): return misc.reindex(households.whatnot_id, persons.household_id)
def b_city_id(households, persons): return misc.reindex(households.b_city_id, persons.household_id)
def ave_unit_size(parcels, buildings): zoneIds = reindex(parcels.zone_id, buildings.parcel_id) zonal_sqft_per_unit = buildings.sqft_per_unit.groupby(zoneIds).mean() return pd.Series(zonal_sqft_per_unit[parcels.zone_id].values, index=parcels.index).fillna(1500)
def whatnot_id(group_quarters, buildings): return misc.reindex(buildings.whatnot_id, group_quarters.building_id)
def node_id(buildings, parcels): return misc.reindex(parcels.node_id, buildings.parcel_id)
def unit_lot_size(buildings, parcels): return misc.reindex(parcels.parcel_size, buildings.parcel_id) / \ buildings.residential_units.replace(0, 1)
def lot_size_per_unit(buildings, parcels): return misc.reindex(parcels.lot_size_per_unit, buildings.parcel_id)
def parcel_average_price(use): return misc.reindex( orca.get_table('zones_prices')[use], orca.get_table('parcels').zone_id)
def node_id(jobs, buildings): return misc.reindex(buildings.node_id, jobs.building_id)
def ave_unit_size(parcels, zones): return misc.reindex(zones.ave_unit_sqft, parcels.zone_id)
def check_unit_ids_match_building_ids(households, residential_units): print "Check unit ids and building ids match" building_ids = misc.reindex( residential_units.building_id, households.unit_id) assert_series_equal(building_ids, households.building_id, 25000)
def whatnot_id(jobs, buildings): return misc.reindex(buildings.whatnot_id, jobs.building_id)