def hlcm_simulate(households, buildings, aggregations, settings, low_income): fname = misc.config("hlcm.yaml") print "\nAffordable housing HLCM:\n" cfg = yaml.load(open(fname)) cfg["choosers_predict_filters"] = "income <= %d" % low_income open(misc.config("hlcm_tmp.yaml"), "w").write(yaml.dump(cfg)) # low income into affordable units utils.lcm_simulate("hlcm_tmp.yaml", households, buildings, aggregations, "building_id", "residential_units", "vacant_affordable_units", settings.get("enable_supply_correction", None)) os.remove(misc.config("hlcm_tmp.yaml")) print "\nMarket rate housing HLCM:\n" # then everyone into market rate units utils.lcm_simulate("hlcm.yaml", households, buildings, aggregations, "building_id", "residential_units", "vacant_market_rate_units", settings.get("enable_supply_correction", None))
def drop_tenure_predict_filters_from_yaml(in_yaml_name, out_yaml_name): fname = misc.config(in_yaml_name) cfg = yaml.load(open(fname)) cfg["alts_predict_filters"] = None if 'lowincome' not in in_yaml_name: cfg["alts_predict_filters"] = 'deed_restricted == False' open(misc.config(out_yaml_name), "w").write(yaml.dump(cfg))
def lcm_estimate(cfg, choosers, chosen_fname, buildings, join_tbls, out_cfg=None): """ Estimate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model choosers : DataFrameWrapper A dataframe of agents doing the choosing chosen_fname : str The name of the column (present in choosers) which contains the ids that identify the chosen alternatives buildings : DataFrameWrapper A dataframe of buildings which the choosers are locating in and which have a supply. join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts out_cfg : string, optional The name of the yaml config file to which to write the estimation results. If not given, the input file cfg is overwritten. """ cfg = misc.config(cfg) choosers = to_frame(choosers, [], cfg, additional_columns=[chosen_fname]) alternatives = to_frame(buildings, join_tbls, cfg) if out_cfg is not None: out_cfg = misc.config(out_cfg) return yaml_to_class(cfg).fit_from_cfg(choosers, chosen_fname, alternatives, cfg, outcfgname=out_cfg)
def hlcm_simulate(households, buildings, aggregations, settings, low_income): fname = misc.config("hlcm.yaml") print "\nAffordable housing HLCM:\n" cfg = yaml.load(open(fname)) cfg["choosers_predict_filters"] = "income <= %d" % low_income open(misc.config("hlcm_tmp.yaml"), "w").write(yaml.dump(cfg)) # low income into affordable units utils.lcm_simulate("hlcm_tmp.yaml", households, buildings, aggregations, "building_id", "residential_units", "vacant_affordable_units", settings.get("enable_supply_correction", None)) os.remove(misc.config("hlcm_tmp.yaml")) print "\nMarket rate housing HLCM:\n" # then everyone into market rate units utils.lcm_simulate("hlcm.yaml", households, buildings, aggregations, "building_id", "residential_units", "vacant_market_rate_units_minus_structural_vacancy", settings.get("enable_supply_correction", None))
def work_at_home_estimate(cfg, choosers, chosen_fname, join_tbls, out_cfg=None): cfg = misc.config(cfg) choosers = to_frame(choosers, join_tbls, cfg, additional_columns=['work_at_home']) if out_cfg is not None: out_cfg = misc.config(out_cfg) return BinaryDiscreteChoiceModel.fit_from_cfg(choosers, chosen_fname, cfg, outcfgname=out_cfg)
def lcm_estimate(choosers, chosen_fname, alternatives, cfgname): """ Parameters ---------- choosers : DataFrame A dataframe of rows of agents which have locations assigned. chosen_fname : string A string indicating the column in the choosers dataframe which gives which location the choosers have chosen. alternatives : DataFrame A dataframe of locations which should include the chosen locations from the choosers dataframe as well as some other locations from which to sample. Values in choosers[chosen_fname] should index into the alternatives dataframe. cfgname : string The name of the yaml config file from which to read the location choice model. """ print "Running location choice model estimation\n" cfg = misc.config(cfgname) model_type = yaml.load(open(cfg))["model_type"] if model_type == "locationchoice": lcm = MNLLocationChoiceModel.from_yaml(str_or_buffer=cfg) lcm.fit(choosers, alternatives, choosers[chosen_fname]) lcm.report_fit() elif model_type == "segmented_locationchoice": lcm = SegmentedMNLLocationChoiceModel.from_yaml(str_or_buffer=cfg) lcm.fit(choosers, alternatives, choosers[chosen_fname]) for k, v in lcm._group.models.items(): print "LCM RESULTS FOR SEGMENT %s\n" % str(k) v.report_fit() print lcm.to_yaml(str_or_buffer=cfg)
def hedonic_estimate(df, cfgname): """ Parameters ---------- df : DataFrame The dataframe which contains the columns to use for the estimation. cfgname : string The name of the yaml config file which describes the hedonic model. """ print "Running hedonic estimation\n" cfg = misc.config(cfgname) model_type = yaml.load(open(cfg))["model_type"] if model_type == "regression": hm = RegressionModel.from_yaml(str_or_buffer=cfg) print hm.fit(df, debug=True).summary() est_data = {"est_data": hm.est_data} if model_type == "segmented_regression": hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg) hm.min_segment_size = 10 for k, v in hm.fit(df, debug=True).items(): print "REGRESSION RESULTS FOR SEGMENT %s\n" % str(k) print v.summary() print est_data = {name: hm._group.models[name].est_data for name in hm._group.models} hm.to_yaml(str_or_buffer=cfg) return est_data
def hedonic_estimate(df, cfgname): """ Parameters ---------- df : DataFrame The dataframe which contains the columns to use for the estimation. cfgname : string The name of the yaml config file which describes the hedonic model. """ print "Running hedonic estimation\n" cfg = misc.config(cfgname) model_type = yaml.load(open(cfg))["model_type"] if model_type == "regression": hm = RegressionModel.from_yaml(str_or_buffer=cfg) print hm.fit(df).summary() est_data = hm.est_data if model_type == "segmented_regression": hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg) hm.min_segment_size = 10 for k, v in hm.fit(df, debug=True).items(): print "REGRESSION RESULTS FOR SEGMENT %s\n" % str(k) print v.summary() print est_data = { name: hm._group.models[name].est_data for name in hm._group.models } hm.to_yaml(str_or_buffer=cfg) return est_data
def lcm_estimate(cfg, choosers, chosen_fname, buildings, join_tbls): """ Estimate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model choosers : DataFrameWrapper A dataframe of agents doing the choosing chosen_fname : str The name of the column (present in choosers) which contains the ids that identify the chosen alternatives buildings : DataFrameWrapper A dataframe of buildings which the choosers are locating in and which have a supply. join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts """ cfg = misc.config(cfg) choosers = to_frame(choosers, [], cfg, additional_columns=[chosen_fname]) alternatives = to_frame(buildings, join_tbls, cfg) return yaml_to_class(cfg).fit_from_cfg(choosers, chosen_fname, alternatives, cfg)
def lcm_simulate(choosers, locations, cfgname, outdf, output_fname): """ Simulate the location choices for the specified choosers Parameters ---------- choosers : DataFrame A dataframe of agents doing the choosing. locations : DataFrame A dataframe of locations which the choosers are location in and which have a supply. cfgname : string The name of the yaml config file from which to read the location choice model. outdf : DataFrame The dataframe to write the simulated location to. outfname : string The column name to write the simulated location to. """ print "Running location choice model simulation\n" cfg = misc.config(cfgname) model_type = yaml.load(open(cfg))["model_type"] if model_type == "locationchoice": lcm = MNLLocationChoiceModel.from_yaml(str_or_buffer=cfg) elif model_type == "segmented_locationchoice": lcm = SegmentedMNLLocationChoiceModel.from_yaml(str_or_buffer=cfg) movers = choosers[choosers[output_fname].isnull()] new_units = lcm.predict(movers, locations) print "Assigned %d choosers to new units" % len(new_units.index) outdf[output_fname].loc[new_units.index] = \ locations.loc[new_units.values][output_fname].values _print_number_unplaced(outdf, output_fname)
def lcm_estimate(cfg, choosers, chosen_fname, buildings, nodes): cfg = misc.config(cfg) choosers = to_frame([choosers], cfg, additional_columns=[chosen_fname]) alternatives = to_frame([buildings, nodes], cfg) return yaml_to_class(cfg).fit_from_cfg(choosers, chosen_fname, alternatives, cfg)
def hedonic_simulate(cfg, tbl, nodes, out_fname): cfg = misc.config(cfg) df = to_frame([tbl, nodes], cfg) price_or_rent, _ = yaml_to_class(cfg).predict_from_cfg(df, cfg) if price_or_rent.replace([np.inf, -np.inf], np.nan).isnull().sum() > 0: print "Hedonic output %d nas or inf (out of %d) in column %s" % \ (price_or_rent.replace([np.inf, -np.inf], np.nan).isnull().sum(), len(price_or_rent), out_fname) price_or_rent[price_or_rent > 1000] = 1000 tbl.update_col_from_series(out_fname, price_or_rent)
def hedonic_estimate(cfg, tbl, join_tbls, out_cfg=None): """ Estimate the hedonic model for the specified table Parameters ---------- cfg : string The name of the yaml config file from which to read the hedonic model tbl : DataFrameWrapper A dataframe for which to estimate the hedonic join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts out_cfg : string, optional The name of the yaml config file to which to write the estimation results. If not given, the input file cfg is overwritten. """ cfg = misc.config(cfg) df = to_frame(tbl, join_tbls, cfg) if out_cfg is not None: out_cfg = misc.config(out_cfg) return yaml_to_class(cfg).fit_from_cfg(df, cfg, outcfgname=out_cfg)
def lcm_simulate(choosers, locations, cfgname, outdf, output_fname): """ Simulate the location choices for the specified choosers Parameters ---------- choosers : DataFrame A dataframe of agents doing the choosing. locations : DataFrame A dataframe of locations which the choosers are location in and which have a supply. cfgname : string The name of the yaml config file from which to read the location choice model. outdf : DataFrame The dataframe to write the simulated location to. outfname : string The column name to write the simulated location to. """ print "Running location choice model simulation\n" cfg = misc.config(cfgname) model_type = yaml.load(open(cfg))["model_type"] if model_type == "locationchoice": lcm = MNLLocationChoiceModel.from_yaml(str_or_buffer=cfg) elif model_type == "segmented_locationchoice": lcm = SegmentedMNLLocationChoiceModel.from_yaml(str_or_buffer=cfg) movers = choosers[choosers[output_fname].isnull()] new_units = lcm.predict(movers, locations, debug=True) print "Assigned %d choosers to new units" % len(new_units.index) if len(new_units) == 0: return outdf[output_fname].loc[new_units.index] = \ locations.loc[new_units.values][output_fname].values _print_number_unplaced(outdf, output_fname) if model_type == "locationchoice": sim_pdf = {"sim_pdf": lcm.sim_pdf} elif model_type == "segmented_locationchoice": sim_pdf = {name: lcm._group.models[name].sim_pdf for name in lcm._group.models} # go back to the buildings from units sim_pdf = pd.concat(sim_pdf.values(), keys=sim_pdf.keys(), axis=1) sim_pdf.index = locations.loc[sim_pdf.index][output_fname].values sim_pdf = sim_pdf.groupby(level=0).first() return sim_pdf
def hedonic_estimate(cfg, tbl, join_tbls): """ Estimate the hedonic model for the specified table Parameters ---------- cfg : string The name of the yaml config file from which to read the hedonic model tbl : DataFrameWrapper A dataframe for which to estimate the hedonic join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts """ cfg = misc.config(cfg) df = to_frame(tbl, join_tbls, cfg) return yaml_to_class(cfg).fit_from_cfg(df, cfg)
def hedonic_simulate(cfg, tbl, join_tbls, out_fname): """ Simulate the hedonic model for the specified table Parameters ---------- cfg : string The name of the yaml config file from which to read the hedonic model tbl : DataFrameWrapper A dataframe for which to estimate the hedonic join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts out_fname : string The output field name (should be present in tbl) to which to write the resulting column to """ cfg = misc.config(cfg) df = to_frame(tbl, join_tbls, cfg) price_or_rent, _ = yaml_to_class(cfg).predict_from_cfg(df, cfg) tbl.update_col_from_series(out_fname, price_or_rent)
def create_lcm_from_config(config_filename, model_attributes): """ For a given model config filename and dictionary of model category attributes, instantiate a SimulationChoiceModel object. """ model_name = config_filename.split('.')[0] model = SimulationChoiceModel.from_yaml( str_or_buffer=misc.config(config_filename)) merge_tables = model_attributes['merge_tables'] \ if 'merge_tables' in model_attributes else None agent_units = model_attributes['agent_units'] \ if 'agent_units' in model_attributes else None choice_column = model_attributes['alternatives_id_name'] \ if model.choice_column is None and 'alternatives_id_name' \ in model_attributes else None model.set_simulation_params(model_name, model_attributes['supply_variable'], model_attributes['vacant_variable'], model_attributes['agents_name'], model_attributes['alternatives_name'], choice_column=choice_column, merge_tables=merge_tables, agent_units=agent_units) return model
def hedonic_simulate(df, cfgname, outdf, outfname): """ Parameters ---------- df : DataFrame The dataframe which contains the columns to use for the estimation. cfgname : string The name of the yaml config file which describes the hedonic model. outdf : DataFrame The dataframe to write the simulated price/rent to. outfname : string The column name to write the simulated price/rent to. """ print "Running hedonic simulation\n" cfg = misc.config(cfgname) model_type = yaml.load(open(cfg))["model_type"] if model_type == "regression": hm = RegressionModel.from_yaml(str_or_buffer=cfg) if model_type == "segmented_regression": hm = SegmentedRegressionModel.from_yaml(str_or_buffer=cfg) hm.min_segment_size = 10 price_or_rent = hm.predict(df) print price_or_rent.describe() outdf.loc[price_or_rent.index.values, outfname] = price_or_rent
def work_at_home_simulate(cfg, choosers, join_tbls): cfg = misc.config(cfg) choosers = to_frame(choosers, join_tbls, cfg) return BinaryDiscreteChoiceModel.predict_from_cfg(choosers, cfg)
def lcm_simulate(cfg, choosers, zones, counties, out_fname): """ Simulate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model. choosers : DataFrame A dataframe of agents doing the choosing. buildings : DataFrame A dataframe of buildings which the choosers are locating in and which have a supply. nodes : DataFrame A land use dataset to give neighborhood info around the buildings - will be joined to the buildings. out_dfname : string The name of the dataframe to write the simulated location to. out_fname : string The column name to write the simulated location to. supply_fname : string The string in the buildings table that indicates the amount of available units there are for choosers, vacant or not. vacant_fname : string The string in the buildings table that indicates the amount of vacant units there will be for choosers. """ cfg = misc.config(cfg) #choosers_df = to_frame([choosers, buildings, parcels, zones], cfg, additional_columns=chooser_cols) #TODO add join parameters to orca.merge_tables choosers_df = to_frame([choosers], cfg, additional_columns=[out_fname, 'employees']) locations_df = to_frame([zones, counties], cfg, additional_columns=['county_id']) #update choosers_df county_id to match that of transition model choosers_df.loc[:, 'county_id'] = orca.get_table('updated_hh').county_id print "There are {0} households".format( len(choosers_df) ) movers = choosers_df[choosers_df[out_fname] == -1] new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, locations_df, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings #new_buildings = pd.Series(units.loc[new_units.values][out_fname].values, # index=new_units.index) # new_bldg_frame = pd.DataFrame(index= new_units.groupby(level=0).first().index) # new_bldg_frame.loc[:, 'building_id'] = new_units.groupby(level=0).first().values # orca.add_table('new_buildings_emp', new_bldg_frame) print locations_df.county_id.loc[new_units].value_counts() choosers.update_col_from_series(out_fname, new_units.groupby(level=0).first()) _print_number_unplaced(choosers, out_fname) out = new_units.value_counts() out_table = pd.DataFrame(index=out.index) out_table.index.name = "zone_id" out_table.loc[:, "hh_demand"] = out
def run_feasibility(parcels, parcel_price_callback, parcel_use_allowed_callback, pipeline=False, cfg=None, **kwargs): """ Execute development feasibility on all development sites Parameters ---------- parcels : DataFrame Wrapper The data frame wrapper for the parcel data parcel_price_callback : function A callback which takes each use of the pro forma and returns a series with index as parcel_id and value as yearly_rent parcel_use_allowed_callback : function A callback which takes each form of the pro forma and returns a series with index as parcel_id and value and boolean whether the form is allowed on the parcel pipeline : bool, optional If True, removes parcels from consideration if already in dev_sites table cfg : str, optional The name of the yaml file to read pro forma configurations from """ cfg = misc.config(cfg) # Create default SqFtProForma pf = (sqftproforma.SqFtProForma.from_yaml(str_or_buffer=cfg) if cfg else sqftproforma.SqFtProForma.from_defaults()) # Update default values using templates and store pf = update_sqftproforma(pf, cfg, **kwargs) orca.add_injectable("pf_config", pf) sites = (pl.remove_pipelined_sites(parcels) if pipeline else parcels.to_frame(parcels.local_columns)) #df = apply_parcel_callbacks(sites, parcel_price_callback, # pf, **kwargs) # compute price for each use df = sites for use in pf.uses: df[use] = parcel_price_callback(use, pf) #feasibility = lookup_by_form(df, parcel_use_allowed_callback, pf, **kwargs) print "Describe of the yearly rent by use" print df[pf.uses].describe() # Computing actual feasibility d = {} forms = pf.forms_to_test or pf.forms for form in forms: print "Computing feasibility for form %s" % form #if parcel_id_col is not None: # parcels = df[parcel_id_col].unique() # allowed = (parcel_use_allowed_callback(form).loc[parcels]) # newdf = df.loc[misc.reindex(allowed, df.parcel_id)] #else: allowed = parcel_use_allowed_callback(form).loc[df.index] newdf = df[allowed] # Core function - computes profitability d[form] = pf.lookup(form, newdf, only_built = pf.only_built, pass_through = pf.pass_through) # Collect results if pf.proposals_to_keep > 1: # feasibility is in long format form_feas = [] for form_name in d.keys(): df_feas_form = d[form_name] df_feas_form['form'] = form_name form_feas.append(df_feas_form) feasibility = pd.concat(form_feas, sort=False) if pf.percent_of_max_profit > 0: feasibility['max_profit_parcel'] = feasibility.groupby([feasibility.index, 'form'])['max_profit'].transform(max) feasibility['ratio'] = feasibility.max_profit/feasibility.max_profit_parcel feasibility = feasibility[feasibility.ratio >= pf.percent_of_max_profit / 100.] feasibility.drop(['max_profit_parcel', 'ratio'], axis=1, inplace = True) feasibility.index.name = 'parcel_id' # add attribute that enumerates proposals (can be used as a unique index) feasibility["feasibility_id"] = np.arange(1, len(feasibility)+1, dtype = "int32") # create a dataset with disaggregated sqft by building type feas_bt = pd.merge(feasibility.loc[:, ["form", "feasibility_id", "residential_sqft", "non_residential_sqft"]], pf.forms_df, left_on = "form", right_index = True) feas_bt.set_index(['form'], append = True, inplace = True) feas_bt[pf.uses[pf.residential_uses.values == 1]] = feas_bt[pf.uses[pf.residential_uses.values == 1]].multiply(feas_bt.residential_sqft, axis = "index") feas_bt[pf.uses[pf.residential_uses.values == 0]] = feas_bt[pf.uses[pf.residential_uses.values == 0]].multiply(feas_bt.non_residential_sqft, axis = "index") orca.add_table('feasibility_bt', feas_bt) else: # feasibility is in wide format feasibility = pd.concat(d.values(), keys = d.keys(), axis=1) orca.add_table('feasibility', feasibility) return feasibility
def lcm_simulate(cfg, choosers, buildings, join_tbls, out_fname, supply_fname, vacant_fname, enable_supply_correction=None): """ Simulate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model choosers : DataFrameWrapper A dataframe of agents doing the choosing buildings : DataFrameWrapper A dataframe of buildings which the choosers are locating in and which have a supply join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts. out_fname : string The column name to write the simulated location to supply_fname : string The string in the buildings table that indicates the amount of available units there are for choosers, vacant or not vacant_fname : string The string in the buildings table that indicates the amount of vacant units there will be for choosers enable_supply_correction : Python dict Should contain keys "price_col" and "submarket_col" which are set to the column names in buildings which contain the column for prices and an identifier which segments buildings into submarkets """ cfg = misc.config(cfg) choosers_df = to_frame(choosers, [], cfg, additional_columns=[out_fname]) additional_columns = [supply_fname, vacant_fname] if enable_supply_correction is not None and \ "submarket_col" in enable_supply_correction: additional_columns += [enable_supply_correction["submarket_col"]] if enable_supply_correction is not None and \ "price_col" in enable_supply_correction: additional_columns += [enable_supply_correction["price_col"]] locations_df = to_frame(buildings, join_tbls, cfg, additional_columns=additional_columns) available_units = buildings[supply_fname] vacant_units = buildings[vacant_fname] print "There are %d total available units" % available_units.sum() print " and %d total choosers" % len(choosers) print " but there are %d overfull buildings" % \ len(vacant_units[vacant_units < 0]) vacant_units = vacant_units[vacant_units > 0] # sometimes there are vacant units for buildings that are not in the # locations_df, which happens for reasons explained in the warning below indexes = np.repeat(vacant_units.index.values, vacant_units.values.astype('int')) isin = pd.Series(indexes).isin(locations_df.index) missing = len(isin[isin == False]) indexes = indexes[isin.values] units = locations_df.loc[indexes].reset_index() check_nas(units) print " for a total of %d temporarily empty units" % vacant_units.sum() print " in %d buildings total in the region" % len(vacant_units) if missing > 0: print "WARNING: %d indexes aren't found in the locations df -" % \ missing print " this is usually because of a few records that don't join " print " correctly between the locations df and the aggregations tables" movers = choosers_df[choosers_df[out_fname] == -1] print "There are %d total movers for this LCM" % len(movers) if enable_supply_correction is not None: assert isinstance(enable_supply_correction, dict) assert "price_col" in enable_supply_correction price_col = enable_supply_correction["price_col"] assert "submarket_col" in enable_supply_correction submarket_col = enable_supply_correction["submarket_col"] lcm = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg) if enable_supply_correction.get("warm_start", False) is True: raise NotImplementedError() multiplier_func = enable_supply_correction.get("multiplier_func", None) if multiplier_func is not None: multiplier_func = orca.get_injectable(multiplier_func) kwargs = enable_supply_correction.get('kwargs', {}) new_prices, submarkets_ratios = supply_and_demand( lcm, movers, units, submarket_col, price_col, base_multiplier=None, multiplier_func=multiplier_func, **kwargs) # we will only get back new prices for those alternatives # that pass the filter - might need to specify the table in # order to get the complete index of possible submarkets submarket_table = enable_supply_correction.get("submarket_table", None) if submarket_table is not None: submarkets_ratios = submarkets_ratios.reindex( orca.get_table(submarket_table).index).fillna(1) # write final shifters to the submarket_table for use in debugging orca.get_table( submarket_table)["price_shifters"] = submarkets_ratios print "Running supply and demand" print "Simulated Prices" print buildings[price_col].describe() print "Submarket Price Shifters" print submarkets_ratios.describe() # we want new prices on the buildings, not on the units, so apply # shifters directly to buildings and ignore unit prices orca.add_column(buildings.name, price_col + "_hedonic", buildings[price_col]) new_prices = buildings[price_col] * \ submarkets_ratios.loc[buildings[submarket_col]].values buildings.update_col_from_series(price_col, new_prices) print "Adjusted Prices" print buildings[price_col].describe() if len(movers) > vacant_units.sum(): print "WARNING: Not enough locations for movers" print " reducing locations to size of movers for performance gain" movers = movers.head(vacant_units.sum()) new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, units, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings new_buildings = pd.Series(units.loc[new_units.values][out_fname].values, index=new_units.index) choosers.update_col_from_series(out_fname, new_buildings) _print_number_unplaced(choosers, out_fname) if enable_supply_correction is not None: new_prices = buildings[price_col] if "clip_final_price_low" in enable_supply_correction: new_prices = new_prices.clip( lower=enable_supply_correction["clip_final_price_low"]) if "clip_final_price_high" in enable_supply_correction: new_prices = new_prices.clip( upper=enable_supply_correction["clip_final_price_high"]) buildings.update_col_from_series(price_col, new_prices) vacant_units = buildings[vacant_fname] print " and there are now %d empty units" % vacant_units.sum() print " and %d overfull buildings" % len(vacant_units[vacant_units < 0])
def lcm_simulate(cfg, choosers, buildings, nodes, out_fname, supply_fname, vacant_fname): """ Simulate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model. choosers : DataFrame A dataframe of agents doing the choosing. buildings : DataFrame A dataframe of buildings which the choosers are locating in and which have a supply. nodes : DataFrame A land use dataset to give neighborhood info around the buildings - will be joined to the buildings. out_dfname : string The name of the dataframe to write the simulated location to. out_fname : string The column name to write the simulated location to. supply_fname : string The string in the buildings table that indicates the amount of available units there are for choosers, vacant or not. vacant_fname : string The string in the buildings table that indicates the amount of vacant units there will be for choosers. """ cfg = misc.config(cfg) choosers_df = to_frame([choosers], cfg, additional_columns=[out_fname]) locations_df = to_frame([buildings, nodes], cfg, [supply_fname, vacant_fname]) available_units = buildings[supply_fname] vacant_units = buildings[vacant_fname] print "There are %d total available units" % available_units.sum() print " and %d total choosers" % len(choosers) print " but there are %d overfull buildings" % \ len(vacant_units[vacant_units < 0]) vacant_units = vacant_units[vacant_units > 0] units = locations_df.loc[np.repeat(vacant_units.index.values, vacant_units.values.astype('int'))].reset_index() print " for a total of %d temporarily empty units" % vacant_units.sum() print " in %d buildings total in the region" % len(vacant_units) movers = choosers_df[choosers_df[out_fname] == -1] if len(movers) > vacant_units.sum(): print "WARNING: Not enough locations for movers" print " reducing locations to size of movers for performance gain" movers = movers.head(vacant_units.sum()) new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, units, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings new_buildings = pd.Series(units.loc[new_units.values][out_fname].values, index=new_units.index) choosers.update_col_from_series(out_fname, new_buildings) _print_number_unplaced(choosers, out_fname) vacant_units = buildings[vacant_fname] print " and there are now %d empty units" % vacant_units.sum() print " and %d overfull buildings" % len(vacant_units[vacant_units < 0])
def hlcm_luz_simulate(households, buildings, aggregations): cfg = "hlcm_luz.yaml" choosers = households buildings = buildings join_tbls = aggregations out_fname = "building_id" supply_fname = "residential_units" vacant_fname = "vacant_residential_units" cfg = misc.config(cfg) #Regional choosers choosers_df = utils.to_frame(choosers, [], cfg, additional_columns=[out_fname, 'base_luz']) movers = choosers_df[choosers_df[out_fname] == -1] print "There are %d total movers for this LCM" % len(movers) #Regional alternatives additional_columns = [supply_fname, vacant_fname, 'luz_id_buildings'] locations_df = utils.to_frame(buildings, join_tbls, cfg, additional_columns=additional_columns) buildings_df = buildings.to_frame( columns=[vacant_fname, 'luz_id_buildings']) buildings_df = buildings_df[buildings_df[vacant_fname] > 0] vacant_units_regional = buildings_df[vacant_fname] luz_id_buildings = buildings_df.luz_id_buildings for luz in np.unique(movers.base_luz): print "HLCM for LUZ %s" % luz movers_luz = movers[movers.base_luz == luz] locations_df_luz = locations_df[locations_df.luz_id_buildings == luz] available_units = buildings[supply_fname][buildings.luz_id_buildings == luz] vacant_units = vacant_units_regional[luz_id_buildings == luz] print "There are %d total available units" % available_units.sum() print " and %d total choosers" % len(movers_luz) print " but there are %d overfull buildings" % \ len(vacant_units[vacant_units < 0]) indexes = np.repeat(vacant_units.index.values, vacant_units.values.astype('int')) units = locations_df_luz.loc[indexes].reset_index() utils.check_nas(units) print " for a total of %d temporarily empty units" % vacant_units.sum( ) print " in %d buildings total in the LUZ" % len(vacant_units) if len(movers_luz) > vacant_units.sum(): print "WARNING: Not enough locations for movers" print " reducing locations to size of movers for performance gain" movers_luz = movers_luz.head(vacant_units.sum()) new_units, _ = utils.yaml_to_class(cfg).predict_from_cfg( movers_luz, units, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings new_buildings = pd.Series( units.loc[new_units.values][out_fname].values, index=new_units.index) choosers.update_col_from_series(out_fname, new_buildings) utils._print_number_unplaced(choosers, out_fname)
def lcm_simulate(cfg, choosers, buildings, nodes, out_fname, supply_fname, vacant_fname): """ Simulate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model. choosers : DataFrame A dataframe of agents doing the choosing. buildings : DataFrame A dataframe of buildings which the choosers are locating in and which have a supply. nodes : DataFrame A land use dataset to give neighborhood info around the buildings - will be joined to the buildings. out_dfname : string The name of the dataframe to write the simulated location to. out_fname : string The column name to write the simulated location to. supply_fname : string The string in the buildings table that indicates the amount of available units there are for choosers, vacant or not. vacant_fname : string The string in the buildings table that indicates the amount of vacant units there will be for choosers. """ cfg = misc.config(cfg) choosers_df = to_frame([choosers], cfg, additional_columns=[out_fname]) locations_df = to_frame([buildings, nodes], cfg, [supply_fname, vacant_fname]) available_units = buildings[supply_fname] vacant_units = buildings[vacant_fname] print "There are %d total available units" % available_units.sum() print " and %d total choosers" % len(choosers) print " but there are %d overfull buildings" % \ len(vacant_units[vacant_units < 0]) vacant_units = vacant_units[vacant_units > 0] units = locations_df.loc[np.repeat( vacant_units.index.values, vacant_units.values.astype('int'))].reset_index() print " for a total of %d temporarily empty units" % vacant_units.sum() print " in %d buildings total in the region" % len(vacant_units) movers = choosers_df[choosers_df[out_fname] == -1] if len(movers) > vacant_units.sum(): print "WARNING: Not enough locations for movers" print " reducing locations to size of movers for performance gain" movers = movers.head(vacant_units.sum()) new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, units, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings new_buildings = pd.Series(units.loc[new_units.values][out_fname].values, index=new_units.index) choosers.update_col_from_series(out_fname, new_buildings) _print_number_unplaced(choosers, out_fname) vacant_units = buildings[vacant_fname] print " and there are now %d empty units" % vacant_units.sum() print " and %d overfull buildings" % len(vacant_units[vacant_units < 0])
def hedonic_simulate(cfg, tbl, nodes, out_fname): cfg = misc.config(cfg) df = to_frame([tbl, nodes], cfg) price_or_rent, _ = yaml_to_class(cfg).predict_from_cfg(df, cfg) tbl.update_col_from_series(out_fname, price_or_rent)
def hedonic_estimate(cfg, tbl, nodes): cfg = misc.config(cfg) df = to_frame([tbl, nodes], cfg) return yaml_to_class(cfg).fit_from_cfg(df, cfg)
def hlcm_luz_simulate(households, buildings, aggregations): cfg = "hlcm_luz.yaml" choosers = households buildings = buildings join_tbls = aggregations out_fname = "building_id" supply_fname = "residential_units" vacant_fname = "vacant_residential_units" cfg = misc.config(cfg) #Regional choosers choosers_df = utils.to_frame(choosers, [], cfg, additional_columns=[out_fname, 'base_luz']) movers = choosers_df[choosers_df[out_fname] == -1] print "There are %d total movers for this LCM" % len(movers) #Regional alternatives additional_columns = [supply_fname, vacant_fname, 'luz_id_buildings'] locations_df = utils.to_frame(buildings, join_tbls, cfg, additional_columns=additional_columns) buildings_df = buildings.to_frame(columns = [vacant_fname, 'luz_id_buildings']) buildings_df = buildings_df[buildings_df[vacant_fname] > 0] vacant_units_regional = buildings_df[vacant_fname] luz_id_buildings = buildings_df.luz_id_buildings for luz in np.unique(movers.base_luz): print "HLCM for LUZ %s" % luz movers_luz = movers[movers.base_luz == luz] locations_df_luz = locations_df[locations_df.luz_id_buildings == luz] available_units = buildings[supply_fname][buildings.luz_id_buildings == luz] vacant_units = vacant_units_regional[luz_id_buildings == luz] print "There are %d total available units" % available_units.sum() print " and %d total choosers" % len(movers_luz) print " but there are %d overfull buildings" % \ len(vacant_units[vacant_units < 0]) indexes = np.repeat(vacant_units.index.values, vacant_units.values.astype('int')) units = locations_df_luz.loc[indexes].reset_index() utils.check_nas(units) print " for a total of %d temporarily empty units" % vacant_units.sum() print " in %d buildings total in the LUZ" % len(vacant_units) if len(movers_luz) > vacant_units.sum(): print "WARNING: Not enough locations for movers" print " reducing locations to size of movers for performance gain" movers_luz = movers_luz.head(vacant_units.sum()) new_units, _ = utils.yaml_to_class(cfg).predict_from_cfg(movers_luz, units, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings new_buildings = pd.Series(units.loc[new_units.values][out_fname].values, index=new_units.index) choosers.update_col_from_series(out_fname, new_buildings) utils._print_number_unplaced(choosers, out_fname)
def drop_predict_filters_from_yaml(in_yaml_name, out_yaml_name): fname = misc.config(in_yaml_name) cfg = yaml.load(open(fname)) cfg["alts_predict_filters"] = None open(misc.config(out_yaml_name), "w").write(yaml.dump(cfg))
def hedonic_simulate(cfg, buildings, parcels, zones, out_fname): cfg = misc.config(cfg) df = to_frame([buildings, parcels, zones], cfg) price_or_rent, _ = yaml_to_class(cfg).predict_from_cfg(df, cfg) buildings.update_col_from_series(out_fname, price_or_rent)
def lcm_simulate(cfg, choosers, buildings, join_tbls, out_fname, supply_fname, vacant_fname, enable_supply_correction=None): """ Simulate the location choices for the specified choosers Parameters ---------- cfg : string The name of the yaml config file from which to read the location choice model choosers : DataFrameWrapper A dataframe of agents doing the choosing buildings : DataFrameWrapper A dataframe of buildings which the choosers are locating in and which have a supply join_tbls : list of strings A list of land use dataframes to give neighborhood info around the buildings - will be joined to the buildings using existing broadcasts. out_fname : string The column name to write the simulated location to supply_fname : string The string in the buildings table that indicates the amount of available units there are for choosers, vacant or not vacant_fname : string The string in the buildings table that indicates the amount of vacant units there will be for choosers enable_supply_correction : Python dict Should contain keys "price_col" and "submarket_col" which are set to the column names in buildings which contain the column for prices and an identifier which segments buildings into submarkets """ cfg = misc.config(cfg) choosers_df = to_frame(choosers, [], cfg, additional_columns=[out_fname]) additional_columns = [supply_fname, vacant_fname] if enable_supply_correction is not None and \ "submarket_col" in enable_supply_correction: additional_columns += [enable_supply_correction["submarket_col"]] if enable_supply_correction is not None and \ "price_col" in enable_supply_correction: additional_columns += [enable_supply_correction["price_col"]] locations_df = to_frame(buildings, join_tbls, cfg, additional_columns=additional_columns) available_units = buildings[supply_fname] vacant_units = buildings[vacant_fname] print "There are %d total available units" % available_units.sum() print " and %d total choosers" % len(choosers) print " but there are %d overfull buildings" % \ len(vacant_units[vacant_units < 0]) vacant_units = vacant_units[vacant_units > 0] # sometimes there are vacant units for buildings that are not in the # locations_df, which happens for reasons explained in the warning below indexes = np.repeat(vacant_units.index.values, vacant_units.values.astype('int')) isin = pd.Series(indexes).isin(locations_df.index) missing = len(isin[isin == False]) indexes = indexes[isin.values] units = locations_df.loc[indexes].reset_index() check_nas(units) print " for a total of %d temporarily empty units" % vacant_units.sum() print " in %d buildings total in the region" % len(vacant_units) if missing > 0: print "WARNING: %d indexes aren't found in the locations df -" % \ missing print " this is usually because of a few records that don't join " print " correctly between the locations df and the aggregations tables" movers = choosers_df[choosers_df[out_fname] == -1] print "There are %d total movers for this LCM" % len(movers) if enable_supply_correction is not None: assert isinstance(enable_supply_correction, dict) assert "price_col" in enable_supply_correction price_col = enable_supply_correction["price_col"] assert "submarket_col" in enable_supply_correction submarket_col = enable_supply_correction["submarket_col"] lcm = yaml_to_class(cfg).from_yaml(str_or_buffer=cfg) if enable_supply_correction.get("warm_start", False) is True: raise NotImplementedError() multiplier_func = enable_supply_correction.get("multiplier_func", None) if multiplier_func is not None: multiplier_func = orca.get_injectable(multiplier_func) kwargs = enable_supply_correction.get('kwargs', {}) new_prices, submarkets_ratios = supply_and_demand( lcm, movers, units, submarket_col, price_col, base_multiplier=None, multiplier_func=multiplier_func, **kwargs) # we will only get back new prices for those alternatives # that pass the filter - might need to specify the table in # order to get the complete index of possible submarkets submarket_table = enable_supply_correction.get("submarket_table", None) if submarket_table is not None: submarkets_ratios = submarkets_ratios.reindex( orca.get_table(submarket_table).index).fillna(1) # write final shifters to the submarket_table for use in debugging orca.get_table(submarket_table)["price_shifters"] = submarkets_ratios print "Running supply and demand" print "Simulated Prices" print buildings[price_col].describe() print "Submarket Price Shifters" print submarkets_ratios.describe() # we want new prices on the buildings, not on the units, so apply # shifters directly to buildings and ignore unit prices orca.add_column(buildings.name, price_col+"_hedonic", buildings[price_col]) new_prices = buildings[price_col] * \ submarkets_ratios.loc[buildings[submarket_col]].values buildings.update_col_from_series(price_col, new_prices) print "Adjusted Prices" print buildings[price_col].describe() if len(movers) > vacant_units.sum(): print "WARNING: Not enough locations for movers" print " reducing locations to size of movers for performance gain" movers = movers.head(vacant_units.sum()) new_units, _ = yaml_to_class(cfg).predict_from_cfg(movers, units, cfg) # new_units returns nans when there aren't enough units, # get rid of them and they'll stay as -1s new_units = new_units.dropna() # go from units back to buildings new_buildings = pd.Series(units.loc[new_units.values][out_fname].values, index=new_units.index) choosers.update_col_from_series(out_fname, new_buildings) _print_number_unplaced(choosers, out_fname) if enable_supply_correction is not None: new_prices = buildings[price_col] if "clip_final_price_low" in enable_supply_correction: new_prices = new_prices.clip(lower=enable_supply_correction[ "clip_final_price_low"]) if "clip_final_price_high" in enable_supply_correction: new_prices = new_prices.clip(upper=enable_supply_correction[ "clip_final_price_high"]) buildings.update_col_from_series(price_col, new_prices) vacant_units = buildings[vacant_fname] print " and there are now %d empty units" % vacant_units.sum() print " and %d overfull buildings" % len(vacant_units[vacant_units < 0])
def create_proforma_config(proforma_settings): yaml_file = misc.config("proforma_user.yaml") user_cfg = yamlio.yaml_to_dict(str_or_buffer=yaml_file) config = psrcdev.update_sqftproforma(user_cfg, proforma_settings) yamlio.convert_to_yaml(config, "proforma.yaml")