def subsidized_residential_feasibility( parcels, settings, add_extra_columns_func, parcel_sales_price_sqft_func, parcel_is_allowed_func, parcels_geography): kwargs = settings['feasibility'].copy() kwargs["only_built"] = False kwargs["forms_to_test"] = ["residential"] # step 1 utils.run_feasibility(parcels, parcel_sales_price_sqft_func, parcel_is_allowed_func, **kwargs) feasibility = orca.get_table("feasibility").to_frame() # get rid of the multiindex that comes back from feasibility feasibility = feasibility.stack(level=0).reset_index(level=1, drop=True) # join to parcels_geography for filtering feasibility = feasibility.join(parcels_geography.to_frame()) # add the multiindex back feasibility.columns = pd.MultiIndex.from_tuples( [("residential", col) for col in feasibility.columns]) feasibility = policy_modifications_of_profit(feasibility, parcels) orca.add_table("feasibility", feasibility) df = orca.get_table("feasibility").to_frame() df = df.stack(level=0).reset_index(level=1, drop=True) df.to_csv("runs/run{}_feasibility_{}.csv".format( orca.get_injectable("run_number"), orca.get_injectable("year")))
def empden_zone_sector(sector, bzone_id): # non-interaction from variables_zones import number_of_jobs_of_sector zones = orca.get_table('zones') zone_density = number_of_jobs_of_sector(sector, zones, orca.get_table('jobs'))/zones.acres zone_density[~np.isfinite(zone_density)] = 0 return misc.reindex(zone_density, bzone_id)
def parcel_is_allowed(form): settings = orca.get_injectable("settings") form_to_btype = settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed zoning_baseline = orca.get_table("zoning_baseline") zoning_scenario = orca.get_table("zoning_scenario") parcels = orca.get_table("parcels") allowed = pd.Series(0, index=parcels.index) # first, it's allowed if any building type that matches # the form is allowed for typ in form_to_btype[form]: allowed |= zoning_baseline[typ] # then we override it with any values that are specified in the scenarios # i.e. they come from the add_bldg and drop_bldg columns for typ in form_to_btype[form]: allowed = zoning_scenario[typ].combine_first(allowed) # notice there is some dependence on ordering here. basically values take # precedent that occur LAST in the form_to_btype mapping # this is a fun modification - when we get too much retail in jurisdictions # we can just eliminate all retail if "eliminate_retail_zoning_from_juris" in settings and form == "retail": allowed *= ~orca.get_table("parcels").juris.isin( settings["eliminate_retail_zoning_from_juris"]) return allowed.astype("bool")
def parcel_is_allowed(form): parcels = orca.get_table('parcels') zoning_allowed_uses = orca.get_table('zoning_allowed_uses').to_frame() if form == 'sf_detached': allowed = zoning_allowed_uses[19] elif form == 'sf_attached': allowed = zoning_allowed_uses[20] elif form == 'mf_residential': allowed = zoning_allowed_uses[21] elif form == 'light_industrial': allowed = zoning_allowed_uses[2] elif form == 'heavy_industrial': allowed = zoning_allowed_uses[3] elif form == 'office': allowed = zoning_allowed_uses[4] elif form == 'retail': allowed = zoning_allowed_uses[5] # elif form == 'residential': # allowed = zoning_allowed_uses[19] | zoning_allowed_uses[20] | zoning_allowed_uses[21] else: df = pd.DataFrame(index=parcels.index) df['allowed'] = True allowed = df.allowed return allowed
def conditional_upzone(scenario, scenario_inputs, attr_name, upzone_name): """ Parameters ---------- scenario : str The name of the active scenario (set to "baseline" if no scenario zoning) scenario_inputs : dict Dictionary of scenario options - keys are scenario names and values are also dictionaries of key-value paris for scenario inputs. Right now "zoning_table_name" should be set to the table that contains the scenario based zoning for that scenario attr_name : str The name of the attribute in the baseline zoning table upzone_name : str The name of the attribute in the scenario zoning table Returns ------- The new zoning per parcel which is increased if the scenario based zoning is higher than the baseline zoning """ zoning_baseline = orca.get_table( scenario_inputs["baseline"]["zoning_table_name"]) attr = zoning_baseline[attr_name] if scenario != "baseline": zoning_scenario = orca.get_table( scenario_inputs[scenario]["zoning_table_name"]) upzone = zoning_scenario[upzone_name].dropna() attr = pd.concat([attr, upzone], axis=1).max(skipna=True, axis=1) return attr
def slr_remove_dev(buildings, destroy_parcels, year, parcels, households, jobs): slr_demolish = buildings.local[buildings.parcel_id.isin (destroy_parcels.index)] orca.add_table("slr_demolish", slr_demolish) print "Demolishing %d buildings" % len(slr_demolish) households = households.to_frame() hh_unplaced = households[households["building_id"] == -1] jobs = jobs.to_frame() jobs_unplaced = jobs[jobs["building_id"] == -1] l1 = len(buildings) buildings = utils._remove_developed_buildings( buildings.to_frame(buildings.local_columns), slr_demolish, unplace_agents=["households", "jobs"]) households = orca.get_table("households") households = households.to_frame() hh_unplaced_slr = households[households["building_id"] == -1] hh_unplaced_slr = hh_unplaced_slr[~hh_unplaced_slr.index.isin (hh_unplaced.index)] orca.add_injectable("hh_unplaced_slr", hh_unplaced_slr) jobs = orca.get_table("jobs") jobs = jobs.to_frame() jobs_unplaced_slr = jobs[jobs["building_id"] == -1] jobs_unplaced_slr = jobs_unplaced_slr[~jobs_unplaced_slr.index.isin (jobs_unplaced.index)] orca.add_injectable("jobs_unplaced_slr", jobs_unplaced_slr) orca.add_table("buildings", buildings) buildings = orca.get_table("buildings") print "Demolished %d buildings" % (l1 - len(buildings))
def effective_max_far(zoning_baseline, parcels, scenario): max_far_from_height = (zoning_baseline.max_height / HEIGHT_PER_STORY) * \ PARCEL_USE_EFFICIENCY s = pd.concat([ zoning_baseline.max_far, max_far_from_height ], axis=1).min(axis=1) if scenario == "baseline": return s # take the max far IFF the upzone value is greater than the current value # i.e. don't let the upzoning operation accidentally downzone scenario_max_far = orca.get_table("zoning_scenario").far_up s = pd.concat([ s, scenario_max_far ], axis=1).max(axis=1) # take the max far IFF the downzone value is less than the current value # i.e. don't let the downzoning operation accidentally upzone scenario_min_far = orca.get_table("zoning_scenario").far_down s = pd.concat([ s, scenario_min_far ], axis=1).min(axis=1) return s.reindex(parcels.index).fillna(0).astype('float')
def _remove_developed_buildings(old_buildings, new_buildings, unplace_agents): redev_buildings = old_buildings.parcel_id.isin(new_buildings.parcel_id) l = len(old_buildings) drop_buildings = old_buildings[redev_buildings] if "dropped_buildings" in orca.orca._TABLES: prev_drops = orca.get_table("dropped_buildings").to_frame() orca.add_table("dropped_buildings", pd.concat([drop_buildings, prev_drops])) else: orca.add_table("dropped_buildings", drop_buildings) old_buildings = old_buildings[np.logical_not(redev_buildings)] l2 = len(old_buildings) if l2-l > 0: print "Dropped {} buildings because they were redeveloped".\ format(l2-l) for tbl in unplace_agents: agents = orca.get_table(tbl) cols = agents.local_columns if "building_id" not in cols: # if it's a unit-level model, need to add building_id # explicitly cols += ["building_id"] agents = agents.to_frame(cols) displaced_agents = agents.building_id.isin(drop_buildings.index) print "Unplaced {} before: {}".format(tbl, len(agents.query( "building_id == -1"))) agents.building_id[displaced_agents] = -1 print "Unplaced {} after: {}".format(tbl, len(agents.query( "building_id == -1"))) orca.add_table(tbl, agents) return old_buildings
def parcel_is_allowed(form): form_to_btype = orca.get_injectable("form_to_btype") # we have zoning by building type but want # to know if specific forms are allowed allowed = [orca.get_table('zoning_baseline') ['type%d' % typ] == 't' for typ in form_to_btype[form]] return pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False)
def func(): buildings = orca.get_table('buildings') jobs = orca.get_table('jobs') total_jobs = buildings.b_total_jobs jobs = jobs.to_frame(jobs.local_columns) jobs_sector = jobs[jobs.sector_id == sector_id].building_id.value_counts() return (jobs_sector / total_jobs).fillna(0)
def building_purchase_price_sqft(): # buildings = orca.get_table('buildings') # s = misc.reindex(buildings.res_price_per_sqft[buildings.general_type == # "Residential"]. # groupby(buildings.luz_id).quantile(.4), # orca.get_table('parcels').luz_id).clip(90, 700) s = misc.reindex(orca.get_table('nodes')['residential'], orca.get_table('parcels').node_id) return s * .81 # In relation to Bay Area via RS Means metro scaling factor
def update_annexed(new_buildings): """ Updates the 'city' and 'overlay_id fields for parcels that get annexed during the simulation year. Prints number of developed, rezoned, and annexed parcels. ---------- new_buildings: DataFrame Table with the buildings that were selected by the developer model Returns ------- None """ new_buildings['rezoned'] = 0 new_buildings.loc[ new_buildings.zoning_id != new_buildings.original_zoning_id, 'rezoned'] = 1 new_buildings = new_buildings.copy().sort_values('rezoned').\ groupby('parcel_id', as_index=False).last() parcel_cols = ['parcel_id', 'city', 'ugb', 'overlay_id'] parcels = orca.get_table('parcels').to_frame(parcel_cols).reset_index() parcels['developed'] = 0 parcels['annexed'] = 0 parcels.loc[parcels.parcel_id.isin(new_buildings.parcel_id), 'developed'] = 1 parcels.loc[(parcels['developed'] == 1) & (parcels['ugb'].notnull()) & (parcels['city'].isnull()), 'annexed'] = 1 parcels.loc[parcels.annexed == 1, 'city'] = parcels.ugb overlays = orca.get_table('zone_overlay_types').to_frame() overlays = overlays[ overlays['overlay_id'] != overlays['annexed_overlay_id']].copy() cols = overlays.columns.drop([ 'overlay_id', 'annexed_overlay_id', 'overlay_combination', 'cost_in_city', 'cost_in_ugb', 'cost_outside_ugb' ]) for col in cols: overlays = overlays.rename(columns={col: col + '_overlay'}) parcels = parcels.\ merge(overlays, on='overlay_id',how='left').set_index('parcel_id') parcels.loc[parcels.annexed == 1, 'overlay_id'] = parcels.annexed_overlay_id annexed = parcels[parcels.annexed==1].copy().\ groupby('city', as_index=False).sum() update_annexed_col(parcels) update_city(parcels) update_overlay_id(parcels) for col in cols: col_overlay = col + '_overlay' parcels.loc[(parcels.annexed == 1) & (parcels[col_overlay].notnull()), col] = parcels[col_overlay] update_zoning_cols(parcels, col) print('Total parcels that will develop: ', new_buildings.parcel_id.nunique()) print('Total rezoned parcels: ', new_buildings.rezoned.sum()) for city in annexed.city.unique(): print('Total annexed parcels: ', city, ': ', annexed[annexed['city'] == city].annexed.item())
def run(self): """ Run the model step: calculate simulated choices and use them to update a column. For binary logit, we calculate predicted probabilities and then perform a weighted random draw to determine the simulated binary outcomes. This is done directly from the fitted parameters, because we can't conveniently regenerate a Statsmodels results object from a dictionary representation. The predicted probabilities and simulated choices are saved to the class object for interactive use (`probabilities` and `choices`, with type pd.Series) but are not persisted in the dictionary representation of the model step. Parameters ---------- None Returns ------- None """ # TO DO - verify that params are in place for prediction df = self._get_data('predict') dm = patsy.dmatrices( data=df, formula_like=self.model_expression, return_type='dataframe')[1] # right-hand-side design matrix beta_X = np.dot(dm, self.fitted_parameters) probs = np.divide(np.exp(beta_X), 1 + np.exp(beta_X)) rand = np.random.random(len(probs)) choices = np.less(rand, probs) # Save results to the class object (via df to include index) df['_probs'] = probs self.probabilities = df._probs df['_choices'] = choices self.choices = df._choices # TO DO - generate column if it does not exist (in get_data?) colname = self._get_out_column() tabname = self._get_out_table() if self.out_value_true is not 'nothing': df.loc[df._choices == True, colname] = self.out_value_true if self.out_value_false is not 'nothing': df.loc[df._choices == False, colname] = self.out_value_false orca.get_table(tabname).update_col_from_series(colname, df[colname], cast=True)
def households_transition(households, household_controls, year, settings): s = orca.get_table('households').base_income_quartile.value_counts() print "Distribution by income before:\n", (s / s.sum()) ret = utils.full_transition(households, household_controls, year, settings['households_transition'], "building_id") s = orca.get_table('households').base_income_quartile.value_counts() print "Distribution by income after:\n", (s / s.sum()) return ret
def func(): print('Disaggregating {} to {} from {}'.format(var_to_disaggregate, to_geog_name, from_geog_name)) from_geog = orca.get_table(from_geog_name) to_geog = orca.get_table(to_geog_name) return misc.reindex(from_geog[var_to_disaggregate], to_geog[from_geog_id_name]).fillna(0)
def get_table(table_name, checkpoint_name=None): """ Return pandas dataframe corresponding to table_name if checkpoint_name is None, return the current (most recent) version of the table. The table can be a checkpointed table or any registered orca table (e.g. function table) if checkpoint_name is specified, return table as it was at that checkpoint (the most recently checkpointed version of the table at or before checkpoint_name) Parameters ---------- table_name : str checkpoint_name : str or None Returns ------- df : pandas.DataFrame """ # orca table not in checkpoints (e.g. a merged table) if table_name not in _PIPELINE.last_checkpoint and orca.is_table(table_name): if checkpoint_name is not None: raise RuntimeError("get_table: checkpoint_name ('%s') not supported" "for non-checkpointed table '%s'" % (checkpoint_name, table_name)) return orca.get_table(table_name).to_frame() # if they want current version of table, no need to read from pipeline store if checkpoint_name is None: if table_name not in _PIPELINE.last_checkpoint: raise RuntimeError("table '%s' never checkpointed." % table_name) if not _PIPELINE.last_checkpoint[table_name]: raise RuntimeError("table '%s' was dropped." % table_name) # return orca.get_table(table_name).local return orca.get_table(table_name).to_frame() # find the requested checkpoint checkpoint = \ next((x for x in _PIPELINE.checkpoints if x['checkpoint_name'] == checkpoint_name), None) if checkpoint is None: raise RuntimeError("checkpoint '%s' not in checkpoints." % checkpoint_name) # find the checkpoint that table was written to store last_checkpoint_name = checkpoint.get(table_name, None) if not last_checkpoint_name: raise RuntimeError("table '%s' not in checkpoint '%s'." % (table_name, checkpoint_name)) # if this version of table is same as current if _PIPELINE.last_checkpoint.get(table_name, None) == last_checkpoint_name: return orca.get_table(table_name).to_frame() return read_df(table_name, last_checkpoint_name)
def test_output_column_autocreation(m): """ Test on-the-fly creation of the output column. """ m.out_column = 'potato_chips' m.run() assert('potato_chips' in orca.get_table('obs').columns) assert(m.choices.equals(orca.get_table('obs').to_frame()['potato_chips']))
def parcel_is_allowed(form): form_to_btype = orca.get_injectable("form_to_btype") # we have zoning by building type but want # to know if specific forms are allowed allowed = [ orca.get_table('zoning_baseline')['type%d' % typ] == 't' for typ in form_to_btype[form] ] return pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False)
def building_purchase_price_sqft(): # buildings = orca.get_table('buildings') # s = misc.reindex(buildings.res_price_per_sqft[buildings.general_type == # "Residential"]. # groupby(buildings.luz_id).quantile(.4), # orca.get_table('parcels').luz_id).clip(90, 700) s = misc.reindex( orca.get_table('nodes')['residential'], orca.get_table('parcels').node_id) return s * .81 # In relation to Bay Area via RS Means metro scaling factor
def choice_model_simulate(location_choice_models): model = location_choice_models[model_name] choices = model.simulate(choice_function=choice_function) print('There are {} unplaced agents.'.format(choices.isnull().sum())) orca.get_table(agents_name).update_col_from_series(model.choice_column, choices, cast=True)
def run(self): """ Run the model step: calculate simulated choices and use them to update a column. Alternatives that appear in the estimation data but not in the model expression will not be available for simulation. Predicted probabilities come from PyLogit. Monte Carlo simulation of choices is performed directly. (This functionality will move to ChoiceModels.) The predicted probabilities and simulated choices are saved to the class object for interactive use (`probabilities` with type pd.DataFrame, and `choices` with type pd.Series) but are not persisted in the dictionary representation of the model step. """ df = self._get_data('predict') long_df = self._to_long(df, 'predict') num_obs = len(df) num_alts = len(self._get_alts()) # Get predictions from underlying model - this is an ndarray with the same length # as the long-format df, representing choice probability for each alternative probs = self.model.predict(long_df) # Generate choices by adapting an approach from UrbanSim MNL # https://github.com/UDST/choicemodels/blob/master/choicemodels/mnl.py#L578-L583 cumprobs = probs.reshape((num_obs, num_alts)).cumsum(axis=1) rands = np.random.random(num_obs) diff = np.subtract(cumprobs.transpose(), rands).transpose() # The diff conversion replaces negative values with 0 and positive values with 1, # so that argmax can return the position of the first positive value choice_ix = np.argmax((diff + 1.0).astype('i4'), axis=1) choice_ix_1d = choice_ix + (np.arange(num_obs) * num_alts) choices = long_df._alt_id.values.take(choice_ix_1d) # Save results to the class object (via df to include indexes) long_df['_probability'] = probs self.probabilities = long_df[['_obs_id', '_alt_id', '_probability']] df['_choices'] = choices self.choices = df._choices # Save to Orca if self.out_column is not None: colname = self.out_column else: colname = self.choice_column tabname = self._get_out_table() orca.get_table(tabname).update_col_from_series(colname, df._choices, cast=True)
def conditional_upzone(scenario, attr_name, upzone_name): scenario_inputs = orca.get_injectable("scenario_inputs") zoning_baseline = orca.get_table( scenario_inputs["baseline"]["zoning_table_name"]) attr = zoning_baseline[attr_name] if scenario != "baseline": zoning_scenario = orca.get_table( scenario_inputs[scenario]["zoning_table_name"]) upzone = zoning_scenario[upzone_name].dropna() attr = pd.concat([attr, upzone], axis=1).max(skipna=True, axis=1) return attr
def parcel_average_price(use): if use == "residential": buildings = orca.get_table('buildings') s = misc.reindex(buildings. res_price_per_sqft[buildings.general_type == "Residential"]. groupby(buildings.luz_id).quantile(.85), orca.get_table('parcels').luz_id).clip(150, 1250) return s return misc.reindex(orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def households_transition(households, household_controls, year, settings): s = orca.get_table('households').base_income_quartile.value_counts() print "Distribution by income before:\n", (s/s.sum()) ret = utils.full_transition(households, household_controls, year, settings['households_transition'], "building_id") s = orca.get_table('households').base_income_quartile.value_counts() print "Distribution by income after:\n", (s/s.sum()) return ret
def func(): agents = orca.get_table(agent) print('Calculating number of {} for {}'.format(agent, geog)) size = agents[geog_id].value_counts() locations_index = orca.get_table(geog).index series = pd.Series(data=size, index=locations_index) series = series.fillna(0) return series
def hlcm_renter_simulate(households, residential_units, aggregations, settings, hlcm_renter_config): # Pre-filter the alternatives to avoid over-pruning (PR 103) correct_alternative_filters_sample(residential_units, households, 'rent') hlcm_simulate(orca.get_table('rent_hh'), orca.get_table('rent_units'), aggregations, settings, hlcm_renter_config, 'rent_equilibration') update_unit_ids(households, 'rent')
def auto_ownership_simulate(households): """ Generate auto ownership choices for the synthetic pop households. The categories are: - 0: no vehicle - 1: one vehicle - 2: two vehicles - 3: three or more vehicles """ # income bin dummies income_bins = pd.cut( orca.get_table('households').to_frame().income, bins=[0, 20000, 40000, 60000, 80000, 100000, 120000, np.inf], labels=['2', '4', '6', '8', '10', '12', '12p'], include_lowest=True) income_bin_dummies = pd.get_dummies(income_bins, prefix='income') for i in income_bin_dummies.columns: orca.add_column('households', i, income_bin_dummies[i]) # load UrbanAccess transit accessibility variables parcels = orca.get_table('parcels').to_frame() am_acc = pd.read_csv('./data/access_indicators_ampeak.csv', dtype={'block_id': str}) am_acc.block_id = am_acc.block_id.str.zfill(15) parcels_with_acc = parcels.merge( am_acc, how='left', on='block_id').reindex( index=parcels.index) # reorder to align with parcels table for acc_col in set(parcels_with_acc.columns) - set(parcels): # fill NA with median value orca.add_column( 'parcels', acc_col, parcels_with_acc[acc_col].fillna( parcels_with_acc[acc_col].median())) @orca.table(cache=False) def hh_merged(): df = orca.merge_tables(target='households', tables=[ 'households', 'units', 'buildings', 'parcels', 'nodessmall', 'nodeswalk' ]) return df m = mm.get_step('auto_ownership') # remove filters, specify out table, out column m.filters = None m.out_table = 'households' m.out_column = 'cars_alt' m.run()
def households_transition(households, household_controls, year, settings, persons): orig_size_hh = households.local.shape[0] orig_size_pers = persons.local.shape[0] orig_pers_index = persons.index orig_hh_index = households.index res = utils.full_transition(households, household_controls, year, settings['households_transition'], "building_id", linked_tables={"persons": (persons.local, 'household_id')}) print "Net change: %s households" % (orca.get_table("households"). local.shape[0] - orig_size_hh) print "Net change: %s persons" % (orca.get_table("persons"). local.shape[0] - orig_size_pers) # changes to households/persons table are not reflected in local scope # need to reset vars to get changes. households = orca.get_table('households') persons = orca.get_table("persons") # need to make some updates to the persons & households table households.update_col_from_series("is_inmigrant", pd.Series(np.where (~households.index.isin (orig_hh_index), 1, 0), index=households.index), cast=True) # new workers dont have jobs yet, set job_id to -1 persons.update_col_from_series("job_id", pd.Series(np.where(~persons.index.isin (orig_pers_index), -1, persons.job_id), index=persons.index), cast=True) # dont know their work at home status yet, set to 0: persons.update_col_from_series("work_at_home", pd.Series(np.where (~persons.index.isin (orig_pers_index), 0, persons.work_at_home), index=persons.index), cast=True) # set non-worker job_id to -2 persons.update_col_from_series("job_id", pd.Series(np.where (persons.employment_status > 0, persons.job_id, -2), index=persons.index), cast=True) orca.clear_cache() return res
def parcel_average_price(use): if use == "residential": buildings = orca.get_table('buildings') s = misc.reindex( buildings.res_price_per_sqft[buildings.general_type == "Residential"].groupby( buildings.luz_id).quantile(.85), orca.get_table('parcels').luz_id).clip(150, 1250) return s return misc.reindex( orca.get_table('nodes')[use], orca.get_table('parcels').node_id)
def func(): agents = orca.get_table(agent).to_frame( columns=[target_variable, geog_id]) locations = orca.get_table(geog) print('Calculating proportion {} {} for {}'.format( target_variable, target_value, geog)) agent_subset = agents[agents[target_variable] == target_value] series = (agent_subset.groupby(geog_id).size() * 1.0 / locations['total_' + agent]) series = series.fillna(0) return series
def create_non_mandatory_tours_table(): persons = orca.get_table('persons') non_mandatory_tour_frequency_alts = orca.get_table('non_mandatory_tour_frequency_alts') df = process_non_mandatory_tours( persons.non_mandatory_tour_frequency.dropna(), non_mandatory_tour_frequency_alts.local ) orca.add_table("non_mandatory_tours", df) tracing.register_traceable_table('non_mandatory_tours', df) pipeline.get_rn_generator().add_channel(df, 'tours')
def TOD_choice_simulate(): """ Generate time of day period choices for the synthetic population home-work and work-home trips. """ TOD_obs = orca.merge_tables('persons', ['persons', 'households', 'jobs']) TOD_obs.dropna(inplace=True) skims = pd.read_csv('./data/skims_110118.csv') TOD_obs = pd.merge(TOD_obs, skims, how='left', left_on=['zone_id_home', 'zone_id_work'], right_on=['orig', 'dest']) TOD_obs = pd.merge(TOD_obs, skims, how='left', left_on=['zone_id_work', 'zone_id_home'], right_on=['orig', 'dest'], suffixes=('_HW', '_WH')) TOD_list = ['EA', 'AM', 'MD', 'PM', 'EV'] for tod1 in TOD_list: for tod2 in TOD_list: col_name = f'da_Time_{tod1}_{tod2}' TOD_obs[col_name] = TOD_obs[f'da_Time_{tod1}_HW'] + TOD_obs[ f'da_Time_{tod2}_WH'] # TOD_obs['TOD'] = None m = mm.get_step('TOD_choice') @orca.table(cache=True) def tripsA(): return TOD_obs m.run() results = orca.get_table('tripsA').to_frame() persons = orca.get_table('persons').to_frame() persons = pd.merge(persons, results[['TOD']], how='left', left_index=True, right_index=True) orca.add_table('persons', persons)
def get_table(table_name, checkpoint_name=None): """ Return pandas dataframe corresponding to table_name if checkpoint_name is None, return the current (most recent) version of the table. The table can be a checkpointed table or any registered orca table (e.g. function table) if checkpoint_name is specified, return table as it was at that checkpoint (the most recently checkpointed version of the table at or before checkpoint_name) Parameters ---------- table_name : str checkpoint_name : str or None Returns ------- df : pandas.DataFrame """ # orca table not in checkpoints (e.g. a merged table) if table_name not in _PIPELINE.last_checkpoint and orca.is_table( table_name): if checkpoint_name is not None: raise RuntimeError( "get_table: checkpoint_name ('%s') not supported" "for non-checkpointed table '%s'" % (checkpoint_name, table_name)) return orca.get_table(table_name).to_frame() # was table ever checkpointed? if table_name not in checkpointed_tables(): raise RuntimeError("table '%s' not in checkpointed tables." % table_name) # if they want current version of table, no need to read from pipeline store if checkpoint_name is None or _PIPELINE.last_checkpoint[ table_name] == checkpoint_name: # return orca.get_table(table_name).local return orca.get_table(table_name).to_frame() if checkpoint_name not in [ checkpoint[CHECKPOINT_NAME] for checkpoint in _PIPELINE.checkpoints ]: raise RuntimeError("checkpoint '%s' not in checkpoints." % checkpoint_name) return read_df(table_name, checkpoint_name)
def parcel_is_allowed(form): settings = orca.get_injectable('settings') form_to_btype = settings["form_to_btype"] # we have zoning by building type but want # to know if specific forms are allowed allowed = [orca.get_table('zoning_baseline') ['type%d' % typ] > 0 for typ in form_to_btype[form]] s = pd.concat(allowed, axis=1).max(axis=1).\ reindex(orca.get_table('parcels').index).fillna(False) #if form == "residential": # # allow multifam in pdas # s[orca.get_table('parcels').pda.notnull()] = 1 return s
def parcel_sales_price_sqft(use): s = parcel_average_price(use) if use == "residential": resunits = orca.get_table('buildings').residential_units.sum() * 1.0 hh = len(orca.get_table('households')) vacancy_rate = 1.0 - (hh / resunits) scaling_factor = .1 / vacancy_rate base_price_factor = 1.25 if vacancy_rate < .1: price_adjustment_factor = base_price_factor * scaling_factor else: price_adjustment_factor = base_price_factor print 'Residential price adjustment factor is: %s' % price_adjustment_factor s *= price_adjustment_factor return s
def hlcm_owner_simulate(households, residential_units, aggregations, settings, hlcm_owner_config): # Note that the submarket id (zone_id) needs to be in the table of # alternatives, for supply/demand equilibration, and needs to NOT be in the # choosers table, to avoid conflicting when the tables are joined # Pre-filter the alternatives to avoid over-pruning (PR 103) correct_alternative_filters_sample(residential_units, households, 'own') hlcm_simulate(orca.get_table('own_hh'), orca.get_table('own_units'), aggregations, settings, hlcm_owner_config, 'price_equilibration') update_unit_ids(households, 'own')
def parcel_sales_price_sqft(use): s = parcel_average_price(use) if use == "residential": resunits = orca.get_table('buildings').residential_units.sum()*1.0 hh = len(orca.get_table('households')) vacancy_rate = 1.0 - (hh/resunits) scaling_factor = .1/vacancy_rate base_price_factor = 1.25 if vacancy_rate < .1: price_adjustment_factor = base_price_factor*scaling_factor else: price_adjustment_factor = base_price_factor print 'Residential price adjustment factor is: %s' % price_adjustment_factor s *= price_adjustment_factor return s
def add_extra_columns(df): for col in ["residential_price", "non_residential_price"]: df[col] = 0 if "deed_restricted_units" not in df.columns: df["deed_restricted_units"] = 0 else: print "Number of deed restricted units built = %d" %\ df.deed_restricted_units.sum() df["redfin_sale_year"] = 2012 if "residential_units" not in df: df["residential_units"] = 0 if "parcel_size" not in df: df["parcel_size"] = \ orca.get_table("parcels").parcel_size.loc[df.parcel_id] if "year" in orca.orca._INJECTABLES and "year_built" not in df: df["year_built"] = orca.get_injectable("year") if "form_to_btype_func" in orca.orca._INJECTABLES and \ "building_type_id" not in df: form_to_btype_func = orca.get_injectable("form_to_btype_func") df["building_type_id"] = df.apply(form_to_btype_func, axis=1) return df
def func(net): print('Calculating {}'.format(name)) nodes = pd.DataFrame(index=net.node_ids) flds = [target_variable] if target_variable else [] if target_value: flds += util.columns_in_filters( ["{} == {}".format(target_variable, target_value)]) if filters: flds += util.columns_in_filters(filters) flds.append('node_id') df = orca.get_table(agent).to_frame(flds) if target_value: df = util.apply_filter_query( df, ["{} == {}".format(target_variable, target_value)]) if filters: df = util.apply_filter_query(df, filters) net.set(df['node_id'], variable=df[target_variable] if target_variable else None) nodes[name] = net.aggregate(radius, type=agg_function, decay=decay) if log: nodes[name] = nodes[name].apply(eval('np.log1p')) return nodes[name]
def effective_max_dua(zoning_baseline, parcels, scenario): max_dua_from_far = zoning_baseline.max_far * 43560 / GROSS_AVE_UNIT_SIZE max_far_from_height = (zoning_baseline.max_height / HEIGHT_PER_STORY) * \ PARCEL_USE_EFFICIENCY max_dua_from_height = max_far_from_height * 43560 / GROSS_AVE_UNIT_SIZE s = pd.concat([ zoning_baseline.max_dua, max_dua_from_far, max_dua_from_height ], axis=1).min(axis=1) scenario_max_dua = orca.get_table("zoning_scenario").dua_up s = pd.concat([ s, scenario_max_dua ], axis=1).max(axis=1) s3 = parcel_is_allowed('residential') return (s.fillna(0) * s3).reindex(parcels.index).fillna(0).astype('float')
def alter_multiplier(mult_val): from sqlalchemy import engine engine = engine.create_engine('postgresql://*****:*****@localhost:5432/postgres', echo=False) #test with zone 1851 DIA mult = orca.get_table('multipliers').to_frame() mult.loc[2453, 'emp_multiplier'] = mult_val mult.to_csv('c:/urbansim_new/urbansim/urbansim_drcog/config/new_multipliers.csv') #orca.get_table('multipliers').update_col_from_series('emp_multiplier', mult.emp_multiplier) zone_summary = orca.get_table('zone_summary').to_frame() output = pd.DataFrame(index=mult.index) output.loc[: ,"emp_multiplier"] = mult.emp_multiplier output.loc[:, "emp_sim"] = zone_summary.emp_sim output.to_sql('calib_1851', engine, if_exists='append')
def test_table_preview(tapp): rv = tapp.get('/tables/dfa/preview') assert rv.status_code == 200 data = rv.data.decode('utf-8') assert data == orca.get_table('dfa').to_frame().to_json(orient='split')
def assert_column_can_be_generated(table_name, column_name): """ There are four types of columns: (1) local columns of a registered table, (2) the index of a registered table, (3) SeriesWrapper columns associated with a table, and (4) ColumnFuncWrapper columns associated with a table. Only the ColumnFuncWrapper columns need to be tested here, because the others already exist at the point when they're registered. Parameters ---------- table_name : str column_name : str Returns ------- None """ assert_column_is_registered(table_name, column_name) t = orca.get_table(table_name) # t.column_type() fails for index columns, so we have to check for them separately if column_name in t.index.names: return elif t.column_type(column_name) == 'function': try: # This seems to be the only way to trigger evaluation _ = t.get_column(column_name) except: # TODO: issues #3 log backtrace msg = "Column '%s' is registered but cannot be generated" % column_name raise OrcaAssertionError(msg) return
def table_csv(table_name): """ Returns a table as text/csv using Pandas default csv output. """ csv = orca.get_table(table_name).to_frame().to_csv() return csv, 200, {"Content-Type": "text/csv"}
def development_projects(parcels, settings): df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv")) for fld in ['residential_sqft', 'residential_price', 'non_residential_price']: df[fld] = 0 df["redfin_sale_year"] = 2012 # hedonic doesn't tolerate nans df["stories"] = df.stories.fillna(1) df["building_sqft"] = df.building_sqft.fillna(0) df["non_residential_sqft"] = df.non_residential_sqft.fillna(0) df["building_type_id"] = df.building_type.map(settings["building_type_map2"]) df = df.dropna(subset=["geom_id"]) # need a geom_id to link to parcel_id df = df.dropna(subset=["year_built"]) # need a year built to get built df["geom_id"] = df.geom_id.astype("int") df = df.query('residential_units != "rent"') df["residential_units"] = df.residential_units.astype("int") df = df.set_index("geom_id") df = geom_id_to_parcel_id(df, parcels).reset_index() # use parcel id # we don't predict prices for schools and hotels right now df = df.query("building_type_id <= 4 or building_type_id >= 7") print "Describe of development projects" print df[orca.get_table('buildings').local_columns].describe() return df
def local_pois(settings): # because of the aforementioned limit of one netowrk at a time for the # POIS, as well as the large amount of memory used, this is now a # preprocessing step n = make_network( settings['build_networks']['walk']['name'], "weight", 3000) n.init_pois( num_categories=1, max_dist=3000, max_pois=1) cols = {} locations = pd.read_csv(os.path.join(misc.data_dir(), 'bart_stations.csv')) n.set_pois("tmp", locations.lng, locations.lat) cols["bartdist"] = n.nearest_pois(3000, "tmp", num_pois=1)[1] locname = 'pacheights' locs = orca.get_table('landmarks').local.query("name == '%s'" % locname) n.set_pois("tmp", locs.lng, locs.lat) cols["pacheights"] = n.nearest_pois(3000, "tmp", num_pois=1)[1] df = pd.DataFrame(cols) df.index.name = "node_id" df.to_csv('local_poi_distances.csv')
def test_expression(orca_session): """ Check that column is created and expression evaluated correctly. """ c = ColumnFromExpression() c.data.table = 'obs' c.data.expression = 'a * 5 + sqrt(b)' c.output.column_name = 'c' c.run() val1 = orca.get_table('obs').get_column('c') df = orca.get_table('obs').to_frame() val2 = df.a * 5 + np.sqrt(df.b) assert (val1.equals(val2))
def overlay_costs(site_proposals): # Includes column with overlay_cost # (considering location in relation to ugb) overlays = orca.get_table('zone_overlay_types').to_frame() overlays = overlays[[ 'overlay_id', 'annexed_overlay_id', 'overlay_combination', 'cost_in_city', 'cost_in_ugb', 'cost_outside_ugb' ]].copy() overlays = overlays.rename( columns={ 'cost_in_city': 'cost_in_city_', 'cost_in_ugb': 'cost_in_ugb_', 'cost_outside_ugb': 'cost_outside_ugb_' }) site_proposals.loc[site_proposals.overlay_id.isnull(), 'overlay_id'] = '-1' site_proposals['overlay_id'] = \ site_proposals['overlay_id'].astype(float).astype(int) site_proposals = \ pd.merge(site_proposals, overlays, on='overlay_id', how = 'left') site_proposals.loc[site_proposals['status_ugb'] == 'within_city', 'overlay_cost'] = site_proposals['cost_in_city_'] site_proposals.loc[site_proposals['status_ugb'] == 'within_ugb', 'overlay_cost'] = site_proposals['cost_in_ugb_'] site_proposals.loc[site_proposals['status_ugb'] == 'outside_ugb', 'overlay_cost'] = site_proposals['cost_outside_ugb_'] site_proposals = site_proposals.drop\ (columns=['cost_in_city_', 'cost_in_ugb_', 'cost_outside_ugb_']) return site_proposals
def price_vars(net): nodes2 = networks.from_yaml(net["walk"], "price_vars.yaml") nodes2 = nodes2.fillna(0) print nodes2.describe() nodes = orca.get_table('nodes') nodes = nodes.to_frame().join(nodes2) orca.add_table("nodes", nodes)
def parcel_is_allowed_func(form): config = orca.get_injectable("pf_config") bt_distr = config.forms[form] glu = config.form_glut[form] zoning = orca.get_table('parcel_zoning') btused = config.residential_uses.index[bt_distr > 0] is_res_bt = config.residential_uses[btused] unit = config.form_density_type[form] parcels = orca.get_table('parcels') result = pd.Series(0, index=parcels.index) for typ in is_res_bt.index: this_zoning = zoning.local.loc[np.logical_and(zoning.index.get_level_values("constraint_type") == unit, zoning.index.get_level_values("generic_land_use_type_id") == glu)] pcls = this_zoning.index.get_level_values("parcel_id") result[pcls] = result[pcls] + 1 return (result == is_res_bt.index.size)
def column_definition(table_name, col_name): """ Get the source of a column function. If a column is a registered Series and not a function then all that is returned is {'type': 'series'}. If the column is a registered function then the JSON returned has keys "type", "filename", "lineno", "text", and "html". "text" is the raw text of the function, "html" has been marked up by Pygments. """ col_type = orca.get_table(table_name).column_type(col_name) if col_type != 'function': return jsonify(type=col_type) filename, lineno, source = \ orca.get_raw_column(table_name, col_name).func_source_data() html = highlight(source, PythonLexer(), HtmlFormatter()) return jsonify( type='function', filename=filename, lineno=lineno, text=source, html=html)
def column_csv(table_name, col_name): """ Return a column as CSV using Pandas' default CSV output. """ csv = orca.get_table(table_name).get_column(col_name).to_csv(path=None) return csv, 200, {"Content-Type": "text/csv"}
def add_extra_columns_func(df): for col in ["residential_price", "non_residential_price"]: df[col] = 0 if "deed_restricted_units" not in df.columns: df["deed_restricted_units"] = 0 else: print "Number of deed restricted units built = %d" %\ df.deed_restricted_units.sum() df["redfin_sale_year"] = 2012 df["redfin_sale_price"] = np.nan if "residential_units" not in df: df["residential_units"] = 0 if "parcel_size" not in df: df["parcel_size"] = \ orca.get_table("parcels").parcel_size.loc[df.parcel_id] if "year" in orca.orca._INJECTABLES and "year_built" not in df: df["year_built"] = orca.get_injectable("year") if "form_to_btype_func" in orca.orca._INJECTABLES and \ "building_type" not in df: form_to_btype_func = orca.get_injectable("form_to_btype_func") df["building_type"] = df.apply(form_to_btype_func, axis=1) return df
def add_extra_columns(df, new_cols = {}): bldgs = orca.get_table('buildings') for col in bldgs.local_columns: if col in new_cols.keys(): df[col] = new_cols[col] elif col not in df.columns: df[col] = 0 return df
def column_describe(table_name, col_name): """ Return summary statistics of a column as JSON. Uses Pandas' "split" JSON format. """ col_desc = orca.get_table(table_name).get_column(col_name).describe() return (col_desc.to_json(orient="split"), 200, {"Content-Type": "application/json"})
def table_describe(table_name): """ Return summary statistics of a table as JSON. Inc.des all columns. Uses Pandas' "split" JSON format. """ desc = orca.get_table(table_name).to_frame().describe() return (desc.to_json(orient="split", date_format="iso"), 200, {"Content-Type": "application/json"})