Пример #1
0
def simulate(dset, config, year=None, show=True, variables=None):

    global NETWORKS
    if not NETWORKS:
        assert 'networks' in config
        netconfig = config['networks']
        assert 'filenames' in netconfig and 'factors' in netconfig and 'maxdistances' in netconfig and 'twoway' in netconfig
        impedances = netconfig[
            'impedances'] if 'impedances' in netconfig else None
        NETWORKS = Networks(
            [os.path.join(misc.data_dir(), x) for x in netconfig['filenames']],
            factors=netconfig['factors'],
            maxdistances=netconfig['maxdistances'],
            twoway=netconfig['twoway'],
            impedances=impedances)

    t1 = time.time()

    if "ind_vars" not in config: raise Exception("No ind_vars specification")
    if "var_lib" not in config:
        raise Exception("All network variables are defined in local var_lib")
    _tbl_ = pd.DataFrame(index=pd.MultiIndex.from_tuples(NETWORKS.nodeids))
    for varname in config["ind_vars"]:
        expression = config["var_lib"][varname]
        _tbl_[varname] = eval(expression).astype('float')

    if 'show' in config and config['show']: print _tbl_.describe()
    if "writetotmp" in config: dset.save_tmptbl(config["writetotmp"], _tbl_)

    print "Finished executing in %f seconds" % (time.time() - t1)
Пример #2
0
 def fetch_networks(self,reload=True,maxdistance=30,rootdir=None,custom_impedances=None):
   if not reload: return networks.Networks(os.path.join(misc.data_dir(),'network%d.pkl'))
   
   network = networks.Networks()
   network.process_network(maxdistance,rootdir,walkminutes=1,custom_impedances=custom_impedances)
   self.networks = network
   return self.networks
Пример #3
0
def simulate(dset,config,year=None,show=True,variables=None):

  global NETWORKS
  if not NETWORKS:
    assert 'networks' in config
    netconfig = config['networks']
    assert 'filenames' in netconfig and 'factors' in netconfig and 'maxdistances' in netconfig and 'twoway' in netconfig
    impedances = netconfig['impedances'] if 'impedances' in netconfig else None
    NETWORKS = Networks([os.path.join(misc.data_dir(),x) for x in netconfig['filenames']],
                    factors=netconfig['factors'],maxdistances=netconfig['maxdistances'],twoway=netconfig['twoway'],
                    impedances=impedances)
  
  t1 = time.time()
    
  if "ind_vars" not in config: raise Exception("No ind_vars specification")
  if "var_lib" not in config: raise Exception("All network variables are defined in local var_lib")
  _tbl_ = pd.DataFrame(index=pd.MultiIndex.from_tuples(NETWORKS.nodeids))
  for varname in config["ind_vars"]:
    expression = config["var_lib"][varname]
    _tbl_[varname] = eval(expression).astype('float')
  
  if 'show' in config and config['show']: print _tbl_.describe()
  if "writetotmp" in config: dset.save_tmptbl(config["writetotmp"],_tbl_)

  print "Finished executing in %f seconds" % (time.time()-t1)
    def estimate_elasticity(self, zones):
        dummies = pd.get_dummies(zones.county)
        zones = pd.concat([zones, dummies], axis=1)
        zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean() #use far_x because Xavier's code adds far to buildings

        #zones = zones[zones.residential_sqft_zone>0]

        #wrook = py.queen_from_shapefile('C:/users/jmartinez/documents/Test Zones/zones_prj_res2.shp')
        wqueen = py.queen_from_shapefile(os.path.join(misc.data_dir(),'shapefiles\\zones.shp'))
        w = py.weights.weights.W(wqueen.neighbors, wqueen.weights)
        x = zones[['zonal_pop','mean_income']]
        x = x.apply(np.log1p)

        x['ln_jobs_within_30min'] = zones['ln_jobs_within_30min']
        x['zone_contains_park'] = zones['zone_contains_park']
        x['Arapahoe'] = zones['Arapahoe']
        x['Boulder'] = zones['Boulder']
        x['Broomfield'] = zones['Broomfield']
        x['Clear Creek'] = zones['Clear Creek']
        x['Denver'] = zones['Denver']
        x['Douglas'] = zones['Douglas']
        x['Elbert'] = zones['Elbert']
        x['Gilpin'] = zones['Gilpin']
        x['Jefferson'] = zones['Jefferson']
        x['Weld'] = zones['Weld']
        x=x.fillna(0)
        x = x.as_matrix()

        imat = zones[['ln_avg_nonres_unit_price_zone','avg_far']]
        imat = imat.fillna(0)
        imat = imat.as_matrix()

        yend = zones['ln_avg_unit_price_zone']
        yend = yend.fillna(0)
        yend = yend.as_matrix()
        yend = np.reshape(yend,(zones.shape[0],1))

        y = zones['residential_sqft_zone']
        y = y.fillna(0)
        y = y.apply(np.log1p)
        y = y.as_matrix()
        y = np.reshape(y,(zones.shape[0],1))


        imat_names = ['non_res_price','avg_far']
        x_names = ['zonal_pop', 'mean_income', 'ln_jobs_within_30min', 'zone_contains_park','Arapahoe','Boulder','Broomfield','Clear Creek','Denver','Douglas','Elbert','Gilpin','Jefferson','Weld']
        yend_name = ['ln_avg_unit_price_zone']
        y_name = 'residential_sqft_zone'

        reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, w_lags=2, robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name)

        demand_elasticity = np.absolute(reg_2sls.betas[15])
        demand_elasticity = 1/demand_elasticity[0]
        #
        return demand_elasticity
Пример #5
0
def data_zone_census( zones):

    data_census=pd.read_csv(os.path.join(misc.data_dir(),'census_zone.csv'))
    #del data_census['median_value']
    data=pd.merge(zones, data_census, on='zone_id', how='inner')


    #Income using census block group dat
    data['median_income']=data['median_income'].astype(float)
    data['ln_inc']=np.log(data['median_income'])

    # Asked price (census)
    #data['median_value']=data['median_value'].apply(float)
    data['ln_price']=np.log(data['median_value'])

    # Race composition
    data['all races']=data['White alone'].apply(float)+ data['Black or African American alone'].apply(float)\
                     + data['American Indian and Alaska Native alone'].apply(float)+ data['Asian alone'].apply(float)\
                +data['Native Hawaiian and Other Pacific Islander alone'].apply(float)+ data['Some other race alone'].apply(float)\
                +data['two races or more'].apply(float)
    data['percent_white']=np.log(data['White alone']/data['all races'])
    data['percent_black']=data['Black or African American alone']/data['all races']
    data['percent_black2']=data['Black or African American alone']/data['all races']**2
    data['ln_residential_sqft_mean2']=data['ln_residential_sqft_mean']**2


    # Creating max  and min income of neighbors ( can important have implications in terms of gentrification)
    geo=pd.DataFrame(data['zonecentroid_x'])
    geo['zonecentroid_y']=data['zonecentroid_y']
    geo=np.array(geo)
    w=pysal.knnW(geo, k=10)

    n=len(geo)
    neigh_income_max=np.zeros(n)
    neigh_income_min=np.zeros(n)

    for i in range(0, n-1):
        arr=w.neighbors[i]
        zone=np.zeros(n)
        for j in arr:
         zone[j]=1

        data['neigh']=zone
        neigh_income_max[i]=data[data['neigh']==1].median_income.max()
        neigh_income_min[i]=data[data['neigh']==1].median_income.min()

    data['ln_neigh_income_max']=np.log(neigh_income_max/data['median_income'])
    data['ln_neigh_income_min']=np.log(neigh_income_min/data['median_income'])

    data=data.set_index(data['zone_id'])
    return data
Пример #6
0
def second_stage(depvar, indvar, data, instrumented, instr, indvariv, fixedeffect):

    # Instrumentation (first stage)
    data=instrument(instrumented,  indvariv, data, instr, fixedeffect)

    # Make sure that there is no inf or nan in the RHS/LHS variables
    for varname in depvar + indvar + fixedeffect:
        data=data[np.isfinite(data[varname])]

    #data=data[data['median_value']<400000]
    # Generate dummies for categorical variables and remove one of them (to avoid multi-collinearity)
    x=pd.get_dummies(data['school_district_id'], prefix='sdis')
    del x['sdis_8']

    # Fill the righ hand side with instruments
    for varname in indvar:
        x[varname]=data[varname]

    # Replace the instrumented variable by ita predictor from stage one
    for varname in instrumented:
        x[varname]=data[varname+'_iv']

    # Add a constant
    x['const']=1

    print x

    # Main Regression. GLM estimation using a Negative Binomial family (it seems to work better than other families)
    mod=sm.GLM(data[depvar], x, family=sm.families.Poisson())
    result=mod.fit()


    # Return Coefficient
    collist=list(x.columns.values)
    dset.store_coeff("coeff_residential",result.params.values,result.params.index)


    coeff_store_path = os.path.join(misc.data_dir(),'coeffs_res.h5')
    coeff_store = pd.HDFStore(coeff_store_path)
    coeff_store['coeffs_res'] = dset.coeffs
    coeff_store.close()

    # Predicted Prices
    data['sim_price']=result.predict()
    print result.summary()
    return data
Пример #7
0
  def fetch_batshh(self,tenure=None):

    if USECHTS:
      batshh = pd.read_csv(os.path.join(misc.data_dir(),'bats2013MTC_household.csv')) 

      batshh = batshh[batshh['INCOM'] < 90] # remove bogus income records
      batshh['income_quartile'] = pd.qcut(batshh['INCOM'],4).labels
      batshh['HHINCOME'] = batshh['INCOM']
  
      if tenure == "sales": batshh = batshh[batshh['OWN']==1]
      elif tenure == "rent": batshh = batshh[batshh['OWN']==2]
    
      return batshh
    else: 
      batshh = self.store['batshh']

      batshh = batshh[batshh['HHINCOME'] < 16] # remove bogus income records
      batshh['income_quartile'] = pd.qcut(batshh['HHINCOME'],4).labels
  
      if tenure == "rent": batshh = batshh[batshh['TENURE']==1]
      elif tenure == "sales": batshh = batshh[batshh['TENURE']<>1]
    
      return batshh
Пример #8
0
def simulate(dset,
             year,
             depvar='building_id',
             alternatives=None,
             simulation_table=None,
             output_names=None,
             agents_groupby=[
                 'income_3_tenure',
             ],
             transition_config=None,
             relocation_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (
            ct["total_number_of_households"] *
            transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(
            ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
            hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added, 'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']

    #calculate mortgage payment values

    temp_count = 0

    buildings = alternatives
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05 / 12
    n = 360
    buildings['est_mortgage_payment'] = buildings.unit_price_residential * (
        (r * (1 + r)**n) / ((1 + r)**n - 1))

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[
            relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[
            rate_field] * .01 * relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers, rate_table, rate_field)
        choosers[depvar].ix[movers] = -1

    movers_all = choosers[choosers[depvar] == -1]

    #distribute county_ids based on demography projections

    county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),
                                                   'county_growth_share.csv'),
                                      index_col=0)
    counties = county_growth_share.columns.values
    current_growth_shares = county_growth_share.loc[year].values
    movers_counties = np.random.choice(counties,
                                       movers_all.shape[0],
                                       replace=True,
                                       p=current_growth_shares)

    movers_all['county_id'] = movers_counties
    empty_units = dset.buildings[(dset.buildings.residential_units >
                                  0)].residential_units.sub(
                                      choosers.groupby('building_id').size(),
                                      fill_value=0)
    empty_units = empty_units[empty_units > 0].order(ascending=False)
    alts = alternatives.ix[np.repeat(empty_units.index.values,
                                     empty_units.values.astype('int'))]

    #create alternatives subset with mortage info
    r = .05 / 12
    n = 360

    try:
        subset_alts = alts[['unit_price_residential', 'county_id']]
    except:
        subset_alts = alts[['unit_price_residential', 'county_id_y']]
        subset_alts.rename(columns={'county_id_y': 'county_id'}, inplace=True)

    subset_alts['payment'] = alts.unit_price_residential * ((r * (1 + r)**n) /
                                                            ((1 + r)**n - 1))

    #generate probabilities
    pdf = gen_probs(dset, movers_all, agents_groupby, alts, output_names)

    #build data structures for loop

    #income_3_tenure limits
    income_limits = {
        1: 60000 / 12,
        2: 120000 / 12,
        3: dset.households.income.max() / 12,
        4: 40000 / 12,
        5: dset.households.income.max() / 12
    }

    bool_price1 = (subset_alts.payment / income_limits[1]) <= 0.33
    bool_price2 = (subset_alts.payment / income_limits[2]) <= 0.33
    bool_price3 = (subset_alts.payment / income_limits[3]) <= 0.33
    bool_price4 = (subset_alts.payment / income_limits[4]) <= 0.33
    bool_price5 = (subset_alts.payment / income_limits[5]) <= 0.33
    d = {}

    for county in counties:
        data_list = []
        bool_counties = subset_alts.county_id == int(county)
        ids1 = subset_alts.loc[(bool_counties) & (bool_price1)].index.tolist()
        ids2 = subset_alts.loc[(bool_counties) & (bool_price2)].index.tolist()
        ids3 = subset_alts.loc[(bool_counties) & (bool_price3)].index.tolist()
        ids4 = subset_alts.loc[(bool_counties) & (bool_price4)].index.tolist()
        ids5 = subset_alts.loc[(bool_counties) & (bool_price5)].index.tolist()
        ##generate lists of probabilities
        prob1 = pdf.loc[set(ids1), 'segment1'].tolist()
        prob2 = pdf.loc[set(ids2), 'segment2'].tolist()
        prob3 = pdf.loc[set(ids3), 'segment3'].tolist()
        prob4 = pdf.loc[set(ids4), 'segment4'].tolist()
        prob5 = pdf.loc[set(ids5), 'segment5'].tolist()

        data_list.append((ids1, prob1))
        data_list.append((ids2, prob2))
        data_list.append((ids3, prob3))
        data_list.append((ids4, prob4))
        data_list.append((ids5, prob5))

        d[int(county)] = data_list

    #call placing method

    m_loop = movers_all[['income_3_tenure', 'county_id', 'building_id']]
    #m_loop = m_loop.head(5000)
    out_list = []

    from functools import partial
    mapfunc = partial(apply_func, d=d, out=out_list)
    p = mp.Pool(processes=4)
    split_dfs = np.array_split(m_loop, 4)
    pool_results = p.map(mapfunc, split_dfs)
    p.close()
    p.join()

    #m_loop.apply(place_households, axis=1, args=(d,out_list))
    master_list = pool_results[0] + pool_results[1] + pool_results[
        2] + pool_results[3]

    building_ids = [i[0] for i in master_list]
    household_id = [i[1] for i in master_list]

    result_frame = pd.DataFrame(columns=['household_id', 'building_id'])
    result_frame['household_id'] = household_id
    result_frame['building_id'] = building_ids
    #
    dset.households.loc[result_frame.household_id,
                        'building_id'] = result_frame['building_id'].values
    #
    #result_frame.to_csv('c:/users/jmartinez/documents/test_results.csv')

    #print out_list

    dset.households.loc[result_frame.household_id]
Пример #9
0
 def fetch_factual(self):
   return pd.read_csv(os.path.join(misc.data_dir(),'factual_places.csv'))
Пример #10
0
import numpy as np, pandas as pd, os
from synthicity.utils import misc
from drcog.models import dataset
dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
np.random.seed(1)
import statsmodels.api as sm
#import pygwr_kernel
import random

"""
This program estimates an hedonic model for prices of residential and non-residential buildings. The benchmark method
combines:
1/ A geographically weighted regression to account for spatial non-stationarity
2/ Poisson or Negative Binonial General Linear Model to estimate a log-linear model with heteroskedastic error terms
3/ Zone employment (later-on when the data is fixed, zone average income or household characteristics)
is instrumented with average buildings characteristics in neighboring zones.

The program is organized in four parts:
1/ Create a dataset for estimation
2/ Run the first stage least squares (average zonal employment regressed on county fixed effect and
  neighboring zones characteristics). The predicted zonal employment is used as an instrument in all following regressions
3/  Run a GLM GWR methods and obtain local hedonoc parameters.
4/ Generate average coefficient for each zone

"""

## Part 1: extract variables and build dataset for estimation
def data_estimation(dset, buildings,parcels,fars,zones,establishments, bid):
    bp=buildings
    p=parcels
    f=fars
Пример #11
0
    def estimate_elasticity(self, zones):
        dummies = pd.get_dummies(zones.county)
        zones = pd.concat([zones, dummies], axis=1)
        zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean(
        )  #use far_x because Xavier's code adds far to buildings

        #zones = zones[zones.residential_sqft_zone>0]

        #wrook = py.queen_from_shapefile('C:/users/jmartinez/documents/Test Zones/zones_prj_res2.shp')
        wqueen = py.queen_from_shapefile(
            os.path.join(misc.data_dir(), 'shapefiles\\zones.shp'))
        w = py.weights.weights.W(wqueen.neighbors, wqueen.weights)
        x = zones[['zonal_pop', 'mean_income']]
        x = x.apply(np.log1p)

        x['ln_jobs_within_30min'] = zones['ln_jobs_within_30min']
        x['zone_contains_park'] = zones['zone_contains_park']
        x['Arapahoe'] = zones['Arapahoe']
        x['Boulder'] = zones['Boulder']
        x['Broomfield'] = zones['Broomfield']
        x['Clear Creek'] = zones['Clear Creek']
        x['Denver'] = zones['Denver']
        x['Douglas'] = zones['Douglas']
        x['Elbert'] = zones['Elbert']
        x['Gilpin'] = zones['Gilpin']
        x['Jefferson'] = zones['Jefferson']
        x['Weld'] = zones['Weld']
        x = x.fillna(0)
        x = x.as_matrix()

        imat = zones[['ln_avg_nonres_unit_price_zone', 'avg_far']]
        imat = imat.fillna(0)
        imat = imat.as_matrix()

        yend = zones['ln_avg_unit_price_zone']
        yend = yend.fillna(0)
        yend = yend.as_matrix()
        yend = np.reshape(yend, (zones.shape[0], 1))

        y = zones['residential_sqft_zone']
        y = y.fillna(0)
        y = y.apply(np.log1p)
        y = y.as_matrix()
        y = np.reshape(y, (zones.shape[0], 1))

        imat_names = ['non_res_price', 'avg_far']
        x_names = [
            'zonal_pop', 'mean_income', 'ln_jobs_within_30min',
            'zone_contains_park', 'Arapahoe', 'Boulder', 'Broomfield',
            'Clear Creek', 'Denver', 'Douglas', 'Elbert', 'Gilpin',
            'Jefferson', 'Weld'
        ]
        yend_name = ['ln_avg_unit_price_zone']
        y_name = 'residential_sqft_zone'

        reg_2sls = py.spreg.twosls_sp.GM_Lag(y,
                                             x,
                                             yend=yend,
                                             q=imat,
                                             w=w,
                                             w_lags=2,
                                             robust='white',
                                             name_x=x_names,
                                             name_q=imat_names,
                                             name_y=y_name,
                                             name_yend=yend_name)

        demand_elasticity = np.absolute(reg_2sls.betas[15])
        demand_elasticity = 1 / demand_elasticity[0]
        #
        return demand_elasticity
Пример #12
0
def calculate_variables(dset):

    ##PARCEL VARIABLES


    # XG: Fix the mismatch between zone and county
    p = dset.parcels
    del p['county_id']
    zone_county=pd.read_csv('C:\urbansim\data/TAZ_County_Table.csv')
    zone_county=zone_county.set_index('zone_id')
    zone_county=zone_county[['county_id']]
    p=pd.merge(p,zone_county, left_on='zone_id', right_index=True)
    pu=p
    #end of fix



    if p.index.name != 'parcel_id':
       p = p.set_index('parcel_id')

    print p[p.zone_id==1725].x

    p['in_denver'] = (p.county_id==8031).astype('int32')
    p['ln_dist_rail'] = p.dist_rail.apply(np.log1p)
    p['ln_dist_bus'] = p.dist_bus.apply(np.log1p)
    p['ln_land_value'] = p.land_value.apply(np.log1p)
    p['land_value_per_sqft'] = p.land_value*1.0/p.parcel_sqft
    p['rail_within_mile'] = (p.dist_rail<5280).astype('int32')
    p['cherry_creek_school_district'] = (p.school_district==8).astype('int32')
    p['acres'] = p.parcel_sqft/43560.0
    p['ln_acres'] = (p.parcel_sqft/43560.0).apply(np.log1p)


    #BUILDING VARIABLES
    b = dset.fetch('buildings',building_sqft_per_job_table=elcm_configuration['building_sqft_per_job_table'],bsqft_job_scaling=elcm_configuration['scaling_factor'])
    b = b[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential']]
    b.loc[:, 'zone_id'] = p.zone_id[b.parcel_id].values

    bsqft_job = dset.building_sqft_per_job
    #bsqft_job.building_sqft_per_job = bsqft_job.building_sqft_per_job
    b = pd.merge(b,bsqft_job,left_on=['zone_id','building_type_id'],right_index=True,how='left')
    b["non_residential_units"] = b.non_residential_sqft/b.building_sqft_per_job#####
    b["base_year_jobs"] = dset.establishments.groupby('building_id').employees.sum()
    # things get all screwed up if you have overfull buildings
    b["non_residential_units"] = b[["non_residential_units","base_year_jobs"]].max(axis=1)
    b["all_units"] = b.residential_units + b.non_residential_units


    b['county_id'] = p.county_id[b.parcel_id].values
    b['townhome'] = (b.building_type_id==24).astype('int32')
    b['multifamily'] = (np.in1d(b.building_type_id,[2,3])).astype('int32')
    b['office'] = (b.building_type_id==5).astype('int32')
    b['retail_or_restaurant'] = (np.in1d(b.building_type_id,[17,18])).astype('int32')
    b['industrial_building'] = (np.in1d(b.building_type_id,[9,22])).astype('int32')
    b['residential_sqft'] = (b.bldg_sq_ft - b.non_residential_sqft)
    b['btype_hlcm'] = 1*(b.building_type_id==2) + 2*(b.building_type_id==3) + 3*(b.building_type_id==20) + 4*np.invert(np.in1d(b.building_type_id,[2,3,20]))
    b['county8001'] = (b.county_id==8001).astype('int32')
    b['county8005'] = (b.county_id==8005).astype('int32')
    b['county8013'] = (b.county_id==8013).astype('int32')
    b['county8014'] = (b.county_id==8014).astype('int32')
    b['county8019'] = (b.county_id==8019).astype('int32')
    b['county8031'] = (b.county_id==8031).astype('int32')
    b['county8035'] = (b.county_id==8035).astype('int32')
    b['county8039'] = (b.county_id==8039).astype('int32')
    b['county8047'] = (b.county_id==8047).astype('int32')
    b['county8059'] = (b.county_id==8059).astype('int32')
    b['county8123'] = (b.county_id==8123).astype('int32')
    b[ 'unit_price_res_sqft']=b[b.residential_units>0].unit_price_residential/b[b.residential_units>0].bldg_sq_ft
    p['nonres_far'] = (b.groupby('parcel_id').non_residential_sqft.sum()/p.acres).apply(np.log1p)
    p['ln_units_per_acre'] = (b.groupby('parcel_id').residential_units.sum()/p.acres).apply(np.log1p)



    #HOUSEHOLD VARIABLES
    hh_estim = dset.fetch('households_for_estimation')
    hh_estim['tenure'] = 1
    hh_estim.loc[hh_estim.own>1, "tenure"] = 2  # corrected chained index error
    hh_estim['income']=0
    hh_estim.loc[hh_estim.income_group==1, "income"] = 7500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==2, "income"] = 17500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==3, "income"] = 25000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==4, "income"] = 35000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==5, "income"] = 45000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==6, "income"] = 55000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==7, "income"] = 67500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==8, "income"] = 87500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==9, "income"] = 117500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==10, "income"] = 142500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group==11, "income"] = 200000  # corrected chained index error

    hh = dset.fetch('households')
    for table in [hh_estim, hh]:
        choosers = table
        choosers['zone_id'] = b.zone_id[choosers.building_id].values
        choosers['building_type_id'] = b.building_type_id[choosers.building_id].values
        choosers['county_id'] = b.county_id[choosers.building_id].values
        choosers['btype'] = 1*(choosers.building_type_id==2) + 2*(choosers.building_type_id==3) + 3*(choosers.building_type_id==20) + 4*np.invert(np.in1d(choosers.building_type_id,[2,3,20]))
        choosers['income_3_tenure'] = 1 * (choosers.income < 60000)*(choosers.tenure == 1) + 2 * np.logical_and(choosers.income >= 60000, choosers.income < 120000)*(choosers.tenure == 1) + 3*(choosers.income >= 120000)*(choosers.tenure == 1) + 4*(choosers.income < 40000)*(choosers.tenure == 2) + 5*(choosers.income >= 40000)*(choosers.tenure == 2)
        choosers['younghead'] = choosers.age_of_head<30
        choosers['hh_with_child'] = choosers.children>0
        choosers['ln_income'] = choosers.income.apply(np.log1p)
        choosers['income5xlt'] = choosers.income*5.0
        choosers['income10xlt'] = choosers.income*5.0
        choosers['wkrs_hhs'] = choosers.workers*1.0/choosers.persons

    #ESTABLISHMENT VARIABLES
    e = dset.fetch('establishments')

    e['zone_id'] = b.zone_id[e.building_id].values

    e['county_id'] = b.county_id[e.building_id].values
    e['sector_id_six'] = 1*(e.sector_id==61) + 2*(e.sector_id==71) + 3*np.in1d(e.sector_id,[11,21,22,23,31,32,33,42,48,49]) + 4*np.in1d(e.sector_id,[7221,7222,7224]) + 5*np.in1d(e.sector_id,[44,45,7211,7212,7213,7223]) + 6*np.in1d(e.sector_id,[51,52,53,54,55,56,62,81,92])
    e['sector_id_retail_agg'] = e.sector_id*np.logical_not(np.in1d(e.sector_id,[7211,7212,7213])) + 7211*np.in1d(e.sector_id,[7211,7212,7213])
    e['nonres_sqft'] = b.non_residential_sqft[e.building_id].values


    #ZONE VARIABLES

    #XG: fix the mismatch zone county
    z = dset.fetch('zones')
    del z['county']
    z['zone_id']=z.index
    zone_county=pd.read_csv(os.path.join(misc.data_dir(), 'TAZ_County_Table.csv'))
    zone_county=zone_county.set_index('zone_id')
    zone_county=zone_county[['county']]
    z=pd.merge(z,zone_county, left_on='zone_id', right_index=True)
    del z['zone_id']
    zu=z
    #end of fix


    z['zonal_hh'] = hh.groupby('zone_id').size()
    z['zonal_emp'] = e.groupby('zone_id').employees.sum()

    z['zone_id']=z.index
    print z.columns
    print z[z['zone_id']==1722]['zonal_emp']
    del z['zone_id']

    z['residential_sqft_zone'] = b.groupby('zone_id').residential_sqft.sum()
    z['zonal_pop'] = hh.groupby('zone_id').persons.sum()
    z['residential_units_zone'] = b.groupby('zone_id').residential_units.sum()
    z['ln_residential_units_zone'] = b.groupby('zone_id').residential_units.sum().apply(np.log1p)
    z['ln_residential_unit_density_zone'] = (b.groupby('zone_id').residential_units.sum()/z.acreage).apply(np.log1p)
    z['non_residential_sqft_zone'] = b.groupby('zone_id').non_residential_sqft.sum()
    z['ln_non_residential_sqft_zone'] = b.groupby('zone_id').non_residential_sqft.sum().apply(np.log1p)
    z['percent_sf'] = b[b.btype_hlcm==3].groupby('zone_id').residential_units.sum()*100.0/(b.groupby('zone_id').residential_units.sum())
    z['avg_unit_price_zone'] = b[(b.residential_units>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_residential.mean()
    z['ln_avg_unit_price_zone'] = b[(b.residential_units>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_residential.mean().apply(np.log1p)
    z['ln_avg_nonres_unit_price_zone'] = b[(b.non_residential_sqft>0)*(b.improvement_value>0)].groupby('zone_id').unit_price_non_residential.mean().apply(np.log1p)
    z['median_age_of_head'] = hh.groupby('zone_id').age_of_head.median()
    z['mean_income'] = hh.groupby('zone_id').income.mean()
    z['median_year_built'] = b.groupby('zone_id').year_built.median().astype('int32')
    z['ln_avg_land_value_per_sqft_zone'] = p.groupby('zone_id').land_value_per_sqft.mean().apply(np.log1p)
    z['median_yearbuilt_post_1990'] = (b.groupby('zone_id').year_built.median()>1990).astype('int32')
    z['median_yearbuilt_pre_1950'] = (b.groupby('zone_id').year_built.median()<1950).astype('int32')
    z['percent_hh_with_child'] = hh[hh.children>0].groupby('zone_id').size()*100.0/z.zonal_hh
    z['percent_renter_hh_in_zone'] = hh[hh.tenure==2].groupby('zone_id').size()*100.0/z.zonal_hh
    z['percent_younghead'] = hh[hh.age_of_head<30].groupby('zone_id').size()*100.0/z.zonal_hh
    z['average_resunit_size'] = b.groupby('zone_id').sqft_per_unit.mean()
    z['zone_contains_park'] = (p[p.lu_type_id==14].groupby('zone_id').size()>0).astype('int32')
    z['emp_sector_agg'] = e[e.sector_id==1].groupby('zone_id').employees.sum()
    z['emp_sector1'] = e[e.sector_id_six==1].groupby('zone_id').employees.sum()
    z['emp_sector2'] = e[e.sector_id_six==2].groupby('zone_id').employees.sum()
    z['emp_sector3'] = e[e.sector_id_six==3].groupby('zone_id').employees.sum()
    z['emp_sector4'] = e[e.sector_id_six==4].groupby('zone_id').employees.sum()
    z['emp_sector5'] = e[e.sector_id_six==5].groupby('zone_id').employees.sum()
    z['emp_sector6'] = e[e.sector_id_six==6].groupby('zone_id').employees.sum()
    z['jobs_within_45min'] = dset.compute_range(z.zonal_emp,45.0)
    z['ln_jobs_within_45min'] = dset.compute_range(z.zonal_emp,45.0).apply(np.log1p)
    z['jobs_within_30min'] = dset.compute_range(z.zonal_emp,30.0)
    z['ln_jobs_within_30min'] = dset.compute_range(z.zonal_emp,30.0).apply(np.log1p)
    z['jobs_within_20min'] = dset.compute_range(z.zonal_emp,20.0)
    z['jobs_within_15min'] = dset.compute_range(z.zonal_emp,15.0)
    z['ln_jobs_within_20min'] = dset.compute_range(z.zonal_emp,20.0).apply(np.log1p)
    z['ln_pop_within_20min'] = dset.compute_range(z.zonal_pop,20.0).apply(np.log1p)
    z['ln_emp_aggsector_within_5min'] = dset.compute_range(z.emp_sector_agg,5.0).apply(np.log1p)
    z['ln_emp_sector1_within_15min'] = dset.compute_range(z.emp_sector1,15.0).apply(np.log1p)
    z['ln_emp_sector2_within_15min'] = dset.compute_range(z.emp_sector2,15.0).apply(np.log1p)
    z['ln_emp_sector3_within_10min'] = dset.compute_range(z.emp_sector3,15.0).apply(np.log1p)
    z['ln_emp_sector3_within_15min'] = dset.compute_range(z.emp_sector3,15.0).apply(np.log1p)
    z['ln_emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3,20.0).apply(np.log1p)
    z['ln_emp_sector4_within_15min'] = dset.compute_range(z.emp_sector4,15.0).apply(np.log1p)
    z['ln_emp_sector5_within_15min'] = dset.compute_range(z.emp_sector5,15.0).apply(np.log1p)
    z['ln_emp_sector6_within_15min'] = dset.compute_range(z.emp_sector6,15.0).apply(np.log1p)
    z['allpurpose_agglosum_floor'] = (z.allpurpose_agglosum>=0)*(z.allpurpose_agglosum)

    #Exports (for Tableau-Employment)

    z['emp_sector1_within_20min'] = dset.compute_range(z.emp_sector1,20.0)
    z['emp_sector2_within_20min'] = dset.compute_range(z.emp_sector2,20.0)
    z['emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3,20.0)
    z['emp_sector4_within_20min'] = dset.compute_range(z.emp_sector4,20.0)
    z['emp_sector5_within_20min'] = dset.compute_range(z.emp_sector5,20.0)
    z['emp_sector6_within_20min'] = dset.compute_range(z.emp_sector6,20.0)

    z['emp_sector1_within_30min'] = dset.compute_range(z.emp_sector1,30.0)
    z['emp_sector2_within_30min'] = dset.compute_range(z.emp_sector2,30.0)
    z['emp_sector3_within_30min'] = dset.compute_range(z.emp_sector3,30.0)
    z['emp_sector4_within_30min'] = dset.compute_range(z.emp_sector4,30.0)
    z['emp_sector5_within_30min'] = dset.compute_range(z.emp_sector5,30.0)
    z['emp_sector6_within_30min'] = dset.compute_range(z.emp_sector6,30.0)

    z['emp_sector1_within_45min'] = dset.compute_range(z.emp_sector1,45.0)
    z['emp_sector2_within_45min'] = dset.compute_range(z.emp_sector2,45.0)
    z['emp_sector3_within_45min'] = dset.compute_range(z.emp_sector3,45.0)
    z['emp_sector4_within_45min'] = dset.compute_range(z.emp_sector4,45.0)
    z['emp_sector5_within_45min'] = dset.compute_range(z.emp_sector5,45.0)
    z['emp_sector6_within_45min'] = dset.compute_range(z.emp_sector6,45.0)

    z['residential_unit_per_jobs_within_15_min']= z['residential_units_zone']/z['jobs_within_15min']
    z['residential_sqft_per_jobs_within_15_min']= (b[np.in1d(b['building_type_id'], [2,3,20,24])].groupby('zone_id').bldg_sq_ft.sum())/z['jobs_within_15min']

    ztableau=z[['zonal_emp', 'emp_sector1','emp_sector2', 'emp_sector3', 'emp_sector4', 'emp_sector5', 'emp_sector6'
        , 'jobs_within_45min', 'jobs_within_30min','jobs_within_20min',
         'emp_sector1_within_20min','emp_sector2_within_20min','emp_sector3_within_20min','emp_sector4_within_20min'
        ,'emp_sector5_within_20min','emp_sector6_within_20min' ,
         'emp_sector1_within_30min','emp_sector2_within_30min','emp_sector3_within_30min','emp_sector4_within_30min'
        ,'emp_sector5_within_30min','emp_sector6_within_30min' ,
          'emp_sector1_within_45min','emp_sector2_within_45min','emp_sector3_within_45min','emp_sector4_within_45min'
        ,'emp_sector5_within_45min','emp_sector6_within_45min','residential_unit_per_jobs_within_15_min' ,'residential_sqft_per_jobs_within_15_min']]
    ztableau.to_csv('C:\urbansim\output\emp_tableau.csv')

    ##JOINS
    #merge parcels with zones
    pz = pd.merge(p.reset_index(),z,left_on='zone_id',right_index=True)
    pz = pz.set_index('parcel_id')
    #merge buildings with parcels/zones
    del b['county_id']
    del b['zone_id']
    bpz = pd.merge(b,pz,left_on='parcel_id',right_index=True)
    bpz['residential_units_capacity'] = bpz.parcel_sqft/1500 - bpz.residential_units
    bpz.loc[bpz.residential_units_capacity < 0, "residential_units_capacity"] = 0  # corrected chained index error
    dset.d['buildings'] = bpz
    if dset.parcels.index.name != 'parcel_id':
        dset.parcels = pu

    dset.d['zones']=zu
    #dset.d['parcels']=pu
Пример #13
0
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if (temp_count > 50):
            break
    #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv')
    dset.households.loc[out_table.index] = out_table
    #homeless.to_csv('C:/users/jmartinez/documents/homeless.csv')


if __name__ == '__main__':
    from drcog.models import dataset
    from drcog.variables import variable_library
    import os
    import cProfile
    dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(), 'drcog.h5'))

    #Load estimated coefficients
    coeff_store = pd.HDFStore(os.path.join(misc.data_dir(), 'coeffs.h5'))
    dset.coeffs = coeff_store.coeffs.copy()
    coeff_store.close()

    coeff_store = pd.HDFStore(os.path.join(misc.data_dir(), 'coeffs_res.h5'))
    dset.coeffs_res = coeff_store.coeffs_res.copy()
    coeff_store.close()

    variable_library.calculate_variables(dset)
    alternatives = dset.buildings[(dset.buildings.residential_units > 0)]
    sim_year = 2011
    fnc = "simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'households',output_names = ('drcog-coeff-hlcm-%s.csv','DRCOG HOUSEHOLD LOCATION CHOICE MODELS (%s)','hh_location_%s','household_building_ids')," +\
                                         "agents_groupby= ['income_3_tenure',],transition_config = {'Enabled':True,'control_totals_table':'annual_household_control_totals','scaling_factor':1.0}," +\
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None,
              output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None):


    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
                hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added,'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']

    #calculate mortgage payment values

    temp_count = 0

    buildings = alternatives
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05/12
    n = 360
    buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1))

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers,rate_table,rate_field)
        choosers[depvar].ix[movers] = -1

    movers_all = choosers[choosers[depvar]==-1]

    #distribute county_ids based on demography projections

    county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),'county_growth_share.csv'), index_col=0 )
    counties = county_growth_share.columns.values
    current_growth_shares = county_growth_share.loc[year].values
    movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares)

    movers_all['county_id'] = movers_counties
    empty_units = dset.buildings[(dset.buildings.residential_units>0)].residential_units.sub(choosers.groupby('building_id').size(),fill_value=0)
    empty_units = empty_units[empty_units>0].order(ascending=False)
    alts = alternatives.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))]

    #create alternatives subset with mortage info
    r = .05/12
    n = 360

    try:
        subset_alts = alts[['unit_price_residential', 'county_id']]
    except:
        subset_alts = alts[['unit_price_residential', 'county_id_y']]
        subset_alts.rename(columns={'county_id_y':'county_id'}, inplace=True)

    subset_alts['payment'] = alts.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1))


    #generate probabilities
    pdf = gen_probs(dset, movers_all, agents_groupby, alts, output_names)

    #build data structures for loop


    #income_3_tenure limits
    income_limits = {1:60000/12, 2:120000/12, 3:dset.households.income.max()/12, 4:40000/12, 5:dset.households.income.max()/12}

    bool_price1 = (subset_alts.payment / income_limits[1]) <= 0.33
    bool_price2 = (subset_alts.payment / income_limits[2]) <= 0.33
    bool_price3 = (subset_alts.payment / income_limits[3]) <= 0.33
    bool_price4 = (subset_alts.payment / income_limits[4]) <= 0.33
    bool_price5 = (subset_alts.payment / income_limits[5]) <= 0.33
    d = {}


    for county in counties:
        data_list = []
        bool_counties = subset_alts.county_id == int(county)
        ids1 = subset_alts.loc[(bool_counties) & (bool_price1)].index.tolist()
        ids2 = subset_alts.loc[(bool_counties) & (bool_price2)].index.tolist()
        ids3 = subset_alts.loc[(bool_counties) & (bool_price3)].index.tolist()
        ids4 = subset_alts.loc[(bool_counties) & (bool_price4)].index.tolist()
        ids5 = subset_alts.loc[(bool_counties) & (bool_price5)].index.tolist()
                ##generate lists of probabilities
        prob1 = pdf.loc[set(ids1), 'segment1'].tolist()
        prob2 = pdf.loc[set(ids2), 'segment2'].tolist()
        prob3 = pdf.loc[set(ids3), 'segment3'].tolist()
        prob4 = pdf.loc[set(ids4), 'segment4'].tolist()
        prob5 = pdf.loc[set(ids5), 'segment5'].tolist()

        data_list.append((ids1, prob1))
        data_list.append((ids2, prob2))
        data_list.append((ids3, prob3))
        data_list.append((ids4, prob4))
        data_list.append((ids5, prob5))

        d[int(county)] = data_list





    #call placing method

    m_loop = movers_all[['income_3_tenure','county_id','building_id']]
    #m_loop = m_loop.head(5000)
    out_list = []

    from functools import partial
    mapfunc = partial(apply_func, d=d, out=out_list)
    p = mp.Pool(processes=4)
    split_dfs = np.array_split(m_loop, 4)
    pool_results = p.map(mapfunc, split_dfs)
    p.close()
    p.join()

    #m_loop.apply(place_households, axis=1, args=(d,out_list))
    master_list = pool_results[0] + pool_results[1] + pool_results[2] + pool_results[3]

    building_ids = [i[0] for i in master_list]
    household_id = [i[1] for i in master_list]

    result_frame = pd.DataFrame(columns=['household_id', 'building_id'])
    result_frame['household_id'] = household_id
    result_frame['building_id'] = building_ids
    #
    dset.households.loc[result_frame.household_id, 'building_id'] = result_frame['building_id'].values
    #
    #result_frame.to_csv('c:/users/jmartinez/documents/test_results.csv')

    #print out_list

    dset.households.loc[result_frame.household_id]
Пример #15
0
def add_rows(data, nrows, starting_index=None):
    """
    Add rows to data table according to a given nrows.
    New rows will have their IDs set to NaN.

    Parameters
    ----------
    data : pandas.DataFrame
    nrows : int
        Number of rows to add.
    starting_index : int, optional
        The starting index from which to calculate indexes for the new
        rows. If not given the max + 1 of the index of `data` will be used.

    Returns
    -------
    updated : pandas.DataFrame
        Table with rows added. New rows will have their index values
        set to NaN.
    added : pandas.Index
        New indexes of the rows that were added.
    copied : pandas.Index
        Indexes of rows that were copied. A row copied multiple times
        will have multiple entries.

    """
    if nrows == 0:
        return data, _empty_index(), _empty_index()

    if not starting_index:
        starting_index = data.index.values.max() + 1

    ###added code to alter age distribution per State Demographer's data

    #import migration data
    migration = pd.read_csv(
        os.path.join(misc.data_dir(), 'NetMigrationByAge.csv'))
    #migration = pd.read_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/NetMigrationByAge.csv')
    migration.columns = ['county', 'age', 'net_migration']
    migration = migration[15:
                          90]  #only use ages that are in the households table
    migration['prob_age'] = migration[
        'net_migration'] / migration.net_migration.sum()  #create weights array

    random_ages = np.random.choice(
        migration.age, nrows,
        p=migration.prob_age)  #randomly choose ages with with wighted pdf

    frame = pd.DataFrame()
    frame['ages'] = random_ages
    grp = frame.groupby('ages').size(
    )  #group by age to know the number of ages randomly chosen from above random choice (line 55)

    agg_list = []
    for i in grp.iteritems():
        age_val = i[0]
        age_count = i[1]
        array = np.random.choice(
            data[data.age_of_head == age_val].index.values, age_count)
        for j in array:
            agg_list.append(j)

    #####original code
    #i_to_copy = np.random.choice(data.index.values, nrows)   ###randomly chooses household index to copy -- could make it better by assigning a distribution to weight picks based on likely new household characteristics
    new_rows = data.loc[agg_list].copy(
    )  #creates new dataframe of copied households
    added_index = pd.Index(
        np.arange(starting_index, starting_index + nrows, dtype=np.int))
    new_rows.index = added_index  #correctly assigns index

    ###temporarily export for analysis
    #new_rows.to_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/households_newdist.csv')

    return pd.concat([data, new_rows]), added_index, pd.Index(agg_list)
Пример #16
0
 def fetch_csv(self,name,**kwargs):
   if name in self.d: return self.d[name]
   tbl = pd.read_csv(os.path.join(misc.data_dir(),name),**kwargs) 
   self.d[name] = tbl
   return tbl
Пример #17
0
    def run(self, name=None, export_buildings_to_urbancanvas=False, base_year=2010, forecast_year=None, fixed_seed=True, random_seed=1, export_indicators=True, indicator_output_directory='C:/opus/data/drcog2/runs', core_components_to_run=None, household_transition=None,household_relocation=None,employment_transition=None, elcm_configuration=None, developer_configuration=None, calibration_configuration=None, hh_targets=None, ru_targets=None,  emp_targets=None, nrsqft_targets=None):
        """Runs an UrbanSim2 scenario 
        """
        
        ##Calibration targets
        #resunit_targets = np.array([.198,.205,.105,.032,.002,.165,.142,.014,.002,.099,.037])
        #hh_targets = np.array([.198,.205,.105,.032,.002,.165,.142,.014,.002,.099,.037])
        #emp_targets = np.array([0.1511,0.2232,0.0737,0.0473,0.0001,0.2435,0.1094,0.0139,0.0005,0.1178,0.0197])
        #nonres_targets = np.array([0.1511,0.2232,0.0737,0.0473,0.0001,0.2435,0.1094,0.0139,0.0005,0.1178,0.0197])
        hh_targets = np.array([hh_targets['hh_8001_target'],hh_targets['hh_8005_target'],hh_targets['hh_8013_target'],hh_targets['hh_8014_target'],hh_targets['hh_8019_target'],hh_targets['hh_8031_target'],hh_targets['hh_8035_target'],hh_targets['hh_8039_target'],hh_targets['hh_8047_target'],hh_targets['hh_8059_target'],hh_targets['hh_8123_target']])
        resunit_targets = np.array([ru_targets['ru_8001_target'],ru_targets['ru_8005_target'],ru_targets['ru_8013_target'],ru_targets['ru_8014_target'],ru_targets['ru_8019_target'],ru_targets['ru_8031_target'],ru_targets['ru_8035_target'],ru_targets['ru_8039_target'],ru_targets['ru_8047_target'],ru_targets['ru_8059_target'],ru_targets['ru_8123_target']])
        emp_targets = np.array([emp_targets['emp_8001_target'],emp_targets['emp_8005_target'],emp_targets['emp_8013_target'],emp_targets['emp_8014_target'],emp_targets['emp_8019_target'],emp_targets['emp_8031_target'],emp_targets['emp_8035_target'],emp_targets['emp_8039_target'],emp_targets['emp_8047_target'],emp_targets['emp_8059_target'],emp_targets['emp_8123_target']])
        nonres_targets = np.array([nrsqft_targets['nr_8001_target'],nrsqft_targets['nr_8005_target'],nrsqft_targets['nr_8013_target'],nrsqft_targets['nr_8014_target'],nrsqft_targets['nr_8019_target'],nrsqft_targets['nr_8031_target'],nrsqft_targets['nr_8035_target'],nrsqft_targets['nr_8039_target'],nrsqft_targets['nr_8047_target'],nrsqft_targets['nr_8059_target'],nrsqft_targets['nr_8123_target']])
        county_id = np.array([8001,8005,8013,8014,8019,8031,8035,8039,8047,8059,8123])
        targets = pd.DataFrame({'county_id':county_id,'resunit_target':resunit_targets,'hh_target':hh_targets,'emp_target':emp_targets,'nonres_target':nonres_targets})
        delta = calibration_configuration['coefficient_step_size']
        margin = calibration_configuration['match_target_within']
        iterations = calibration_configuration['iterations']
        
        for it in range(iterations):
            logger.log_status('Calibration iteration: ' + str(it))
            
            
            logger.log_status('Starting UrbanSim2 run.')
            dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
            seconds_start = time.time()
            if fixed_seed:
                logger.log_status('Running with fixed random seed.')
                np.random.seed(random_seed)
                
            #Load estimated coefficients
            coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs.h5'))
            dset.coeffs = coeff_store.coeffs.copy()
            coeff_store.close()

            coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs_res.h5'))
            dset.coeffs_res = coeff_store.coeffs_res.copy()
            coeff_store.close()
            
            #Keep track of unplaced agents by year
            unplaced_hh = []
            unplaced_emp = []
            
            for sim_year in range(base_year,forecast_year+1):
                print 'Simulating year ' + str(sim_year)
                logger.log_status(sim_year)

                ##Variable Library calculations
                variable_library.calculate_variables(dset)
                
                #Record pre-demand model zone-level household/job totals
                hh_zone1 = dset.fetch('households').groupby('zone_id').size()
                emp_zone1 = dset.fetch('establishments').groupby('zone_id').employees.sum()
                        
                ############     ELCM SIMULATION
                if core_components_to_run['ELCM']:
                    logger.log_status('ELCM simulation.')
                    alternatives = dset.buildings[(dset.buildings.non_residential_sqft>0)]
                    elcm_simulation.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'establishments',output_names = ("drcog-coeff-elcm-%s.csv","DRCOG EMPLOYMENT LOCATION CHOICE MODELS (%s)","emp_location_%s","establishment_building_ids"),
                                             agents_groupby= ['sector_id_retail_agg',],transition_config = {'Enabled':True,'control_totals_table':'annual_employment_control_totals','scaling_factor':1.0})
                        
                #################     HLCM simulation
                if core_components_to_run['HLCM']:
                    logger.log_status('HLCM simulation.')
                    alternatives = dset.buildings[(dset.buildings.residential_units>0)]
                    new_hlcm_simulation.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'households',output_names = ("drcog-coeff-hlcm-%s.csv","DRCOG HOUSEHOLD LOCATION CHOICE MODELS (%s)","hh_location_%s","household_building_ids"),
                                             agents_groupby= ['income_3_tenure',],transition_config = {'Enabled':True,'control_totals_table':'annual_household_control_totals','scaling_factor':1.0},
                                             relocation_config = {'Enabled':True,'relocation_rates_table':'annual_household_relocation_rates','scaling_factor':1.0},)

                ############     REPM SIMULATION
                if core_components_to_run['Price']:
                    logger.log_status('REPM simulation.')
                    #Residential
                     #Residential
                    census_model_simulation.simulate_residential(dset, 'unit_price_res_sqft', 'school_district_id', 10, sim_year)

                    #Non-residential                                    
                    regression_model_simulation.simulate(dset, year=sim_year,output_varname='unit_price_non_residential', simulation_table='buildings', output_names = ["drcog-coeff-nrhedonic-%s.csv","DRCOG NRHEDONIC MODEL (%s)","nrprice_%s"],
                                                         agents_groupby = 'building_type_id', segment_ids = [5,8,11,16,17,18,21,23,9,22])
                    
                ############     DEVELOPER SIMULATION
                if core_components_to_run['Developer']:
                    logger.log_status('Proforma simulation.')
                    buildings, newbuildings = proforma_developer_model.run(dset,hh_zone1,emp_zone1,developer_configuration,sim_year)
                    dset.d['buildings'] = pd.concat([buildings,newbuildings])

                ###########   Indicators
                # if export_indicators:
                    # unplaced_hh.append((dset.households.building_id==-1).sum())
                    # unplaced_emp.append(dset.establishments[dset.establishments.building_id==-1].employees.sum())
                    # if sim_year == forecast_year:
                        # logger.log_status('Exporting indicators')
                        # indicators.run(dset, indicator_output_directory, forecast_year)
                        
                ###########     TRAVEL MODEL
                    # if travel_model_configuration['export_to_tm']:
                        # if sim_year in travel_model_configuration['years_to_run']:
                            # logger.log_status('Exporting to TM')
                            # export_zonal_file.export_zonal_file_to_tm(dset,sim_year,tm_input_dir=travel_model_configuration['tm_input_dir'])
                    
            elapsed = time.time() - seconds_start
            print "TOTAL elapsed time: " + str(elapsed) + " seconds."
            
            
            ###########   Calibration
            logger.log_status('Calibration coefficient updating')
            import math
            hh_submodels = []
            for col in dset.coeffs.columns:
                if col[0].startswith('hh_') and col[1]=='fnames':
                    hh_submodels.append(col[0])
            emp_submodels = []
            for col in dset.coeffs.columns:
                if col[0].startswith('emp_') and col[1]=='fnames':
                    emp_submodels.append(col[0])

            #Record base values for temporal comparison
            hh = dset.store.households
            e = dset.store.establishments
            b = dset.store.buildings
            p = dset.store.parcels.set_index('parcel_id')
            if p.index.name != 'parcel_id':
                p=p.set_index(p['parcel_id'])

            b['county_id'] = p.county_id[b.parcel_id].values
            hh['county_id'] = b.county_id[hh.building_id].values
            e['county_id'] = b.county_id[e.building_id].values
            base_hh_county = hh.groupby('county_id').size()
            base_emp_county = e.groupby('county_id').employees.sum()
            base_ru_county = b.groupby('county_id').residential_units.sum()
            base_nr_county = b.groupby('county_id').non_residential_sqft.sum()
            
            #Calibration indicators
            b = dset.fetch('buildings')
            e = dset.fetch('establishments')
            hh = dset.fetch('households')
            p = dset.parcels
            if p.index.name != 'parcel_id':
                p = p.set_index(p['parcel_id'])
            b['county_id'] = p.county_id[b.parcel_id].values
            hh['county_id'] = b.county_id[hh.building_id].values
            e['county_id'] = b.county_id[e.building_id].values
            sim_hh_county = hh.groupby('county_id').size()
            sim_emp_county = e.groupby('county_id').employees.sum()
            sim_ru_county = b.groupby('county_id').residential_units.sum()
            sim_nr_county = b.groupby('county_id').non_residential_sqft.sum()
            hh_diff_county = sim_hh_county - base_hh_county
            emp_diff_county = sim_emp_county - base_emp_county
            ru_diff_county = sim_ru_county - base_ru_county
            nr_diff_county = sim_nr_county - base_nr_county

            prop_growth_emp = emp_diff_county*1.0/emp_diff_county.sum()
            prop_growth_hh = hh_diff_county*1.0/hh_diff_county.sum()
            prop_growth_ru = ru_diff_county*1.0/ru_diff_county.sum()
            prop_growth_nr = nr_diff_county*1.0/nr_diff_county.sum()
            
            county_args = pd.read_csv(os.path.join(misc.data_dir(),'county_calib.csv')).set_index('county_id')
            
            i = 0;j = 0;k = 0;m = 0
            for x in targets.county_id.values:
                cid = int(x)
                print cid
                prop_ru = prop_growth_ru[cid]
                prop_hh = prop_growth_hh[cid]
                prop_emp = prop_growth_emp[cid]
                prop_nonres = prop_growth_nr[cid]
                print 'ru prop is ' + str(prop_ru)
                print 'nsqft prop is ' + str(prop_nonres)
                print 'hh prop is ' + str(prop_hh)
                print 'emp prop is ' + str(prop_emp)
                logger.log_status('ru prop is ' + str(prop_ru))
                logger.log_status('nsqft prop is ' + str(prop_nonres))
                logger.log_status('hh prop is ' + str(prop_hh))
                logger.log_status('emp prop is ' + str(prop_emp))
                target_ru = targets.resunit_target[targets.county_id==cid].values[0]
                target_hh = targets.hh_target[targets.county_id==cid].values[0]
                target_emp = targets.emp_target[targets.county_id==cid].values[0]
                target_nonres = targets.nonres_target[targets.county_id==cid].values[0]
                print 'ru target is ' + str(target_ru)
                print 'nsqft target is ' + str(target_nonres)
                print 'hh target is ' + str(target_hh)
                print 'emp target is ' + str(target_emp)
                logger.log_status('ru target is ' + str(target_ru))
                logger.log_status('nsqft target is ' + str(target_nonres))
                logger.log_status('hh target is ' + str(target_hh))
                logger.log_status('emp target is ' + str(target_emp))
                
                varname = 'county%s' % (cid)
                print varname
                if (prop_ru > (target_ru - margin)) and (prop_ru < (target_ru + margin)):
                    print 'NO ru action.'
                    logger.log_status('NO ru action.')
                    i = i + 1
                elif math.isnan(prop_ru) or (prop_ru < target_ru):
                    county_args.chh_demand_factor[cid] = county_args.chh_demand_factor[cid].astype(float) + 0.01
                    county_args.cres_price_factor[cid] = county_args.cres_price_factor[cid].astype(float) + 0.01
                    print 'ru action is PLUS'
                    logger.log_status('ru action is PLUS')
                elif prop_ru > target_ru:
                    county_args.chh_demand_factor[cid] = county_args.chh_demand_factor[cid].astype(float) - 0.01
                    county_args.cres_price_factor[cid] = county_args.cres_price_factor[cid].astype(float) - 0.01
                    print 'ru action is MINUS'
                    logger.log_status('ru action is MINUS')
                    
                if (prop_hh > (target_hh - margin)) and (prop_hh < (target_hh + margin)):
                    print 'NO hh action.'
                    logger.log_status('NO hh action.')
                    j = j + 1
                elif math.isnan(prop_hh) or (prop_hh < target_hh):
                    for submodel in hh_submodels:
                        dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] + delta
                    print 'hh action is PLUS'
                    logger.log_status('hh action is PLUS')
                elif prop_hh > target_hh:
                    for submodel in hh_submodels:
                        dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] - delta
                    print 'hh action is MINUS'
                    logger.log_status('hh action is MINUS')
                    
                if (prop_emp > (target_emp - margin)) and (prop_emp < (target_emp + margin)):
                    print 'NO emp action.'
                    logger.log_status('NO emp action.')
                    k = k + 1
                elif math.isnan(prop_emp) or (prop_emp < target_emp):
                    for submodel in emp_submodels:
                        dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] + delta
                    print 'emp action is PLUS'
                    logger.log_status('emp action is PLUS')
                elif prop_emp > target_emp:
                    for submodel in emp_submodels:
                        dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] = dset.coeffs[(submodel, 'coeffs')][dset.coeffs[(submodel,'fnames')]==varname] - delta
                    print 'emp action is MINUS'
                    logger.log_status('emp action is MINUS')
                    
                if (prop_nonres > (target_nonres - margin)) and (prop_nonres < (target_nonres + margin)):
                    print 'NO nonres action.'
                    logger.log_status('NO nonres action.')
                    m = m + 1
                elif math.isnan(prop_nonres) or (prop_nonres < target_nonres):
                    county_args.cemp_demand_factor[cid] = county_args.cemp_demand_factor[cid].astype(float) + 0.01
                    county_args.cnonres_price_factor[cid] = county_args.cnonres_price_factor[cid].astype(float) + 0.01
                    print county_args.cnonres_price_factor[cid]
                    print 'nonres action is PLUS'
                    logger.log_status('nonres action is PLUS')
                elif prop_nonres > target_nonres:
                    county_args.cemp_demand_factor[cid] = county_args.cemp_demand_factor[cid].astype(float) - 0.01
                    county_args.cnonres_price_factor[cid] = county_args.cnonres_price_factor[cid].astype(float) - 0.01
                    print 'nonres action is MINUS'
                    print county_args.cnonres_price_factor[cid]
                    logger.log_status('nonres action is MINUS')
                    
            print i,j,k,m
            logger.log_status('Number of hh county targets met: %s' % j)
            logger.log_status('Number of emp county targets met: %s' % k)
            logger.log_status('Number of ru county targets met: %s' % i)
            logger.log_status('Number of nr county targets met: %s' % m)
            ###Save calibrated coefficients at the end of each iteration
            coeff_store_path = os.path.join(misc.data_dir(),'coeffs.h5')
            coeff_store = pd.HDFStore(coeff_store_path)
            coeff_store['coeffs'] = dset.coeffs
            coeff_store.close()
            county_args.to_csv(os.path.join(misc.data_dir(),'county_calib.csv'))
def run(dset,hh_zone1,emp_zone1,developer_configuration,sim_year):
    #Record post-demand-model change in zone-level household/job totals
    hh = dset.fetch('households')
    e = dset.fetch('establishments')
    buildings = dset.fetch('buildings')
    parcels = dset.parcels
    if parcels.index.name != 'parcel_id':
        parcels = parcels.set_index(parcels['parcel_id'])
    buildings['zone_id'] = parcels.zone_id[buildings.parcel_id].values

    #e['zone_id'] = buildings.zone_id[e.building_id].values
    hh['zone_id'] = buildings.zone_id[hh.building_id].values
    hh_zone2 = hh.groupby('zone_id').size()
    emp_zone2 = e.groupby('zone_id').employees.sum()
    zdiff = pd.DataFrame(index=dset.zones.index) #######
    zdiff['hh_zone1'] = hh_zone1
    zdiff['hh_zone2'] = hh_zone2
    zdiff['emp_zone1'] = emp_zone1
    zdiff['emp_zone2'] = emp_zone2
    zdiff = zdiff.fillna(0)
    zdiff.hh_zone2 = zdiff.hh_zone2+5
    zdiff.emp_zone2 = zdiff.emp_zone2+5
    hh_zone_diff = (zdiff.hh_zone2 - zdiff.hh_zone1)
    emp_zone_diff = (zdiff.emp_zone2 - zdiff.emp_zone1)



    #####Get the user inputted zone args
    if developer_configuration['zonal_levers']:
        zone_args = pd.read_csv(os.path.join(misc.data_dir(),'devmodal_zone_args.csv')).set_index('zone_id')
        ##Getting county_id into zone_args.  Eventually, lets move the dset.zones operations to the varlib
        dset.zones['county_id'] = 0
        dset.zones.loc[dset.zones.county == 'Adams', "county_id"] = 8001  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Arapahoe', "county_id"] = 8005  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Boulder', "county_id"] = 8013  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Broomfield', "county_id"] = 8014  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Clear Creek', "county_id"] = 8019  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Denver', "county_id"] = 8031  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Douglas', "county_id"] = 8035  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Elbert', "county_id"] = 8039  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Gilpin', "county_id"] = 8047  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Jefferson', "county_id"] = 8059  # corrected chained index error
        dset.zones.loc[dset.zones.county == 'Weld', "county_id"] = 8123  # corrected chained index error
        zone_args['cid'] = dset.zones.county_id
        pd.set_option('display.max_rows', 1000)

        ##Loading/applying county calib factors to scale the zone args
        county_args = pd.read_csv(os.path.join(misc.data_dir(),'county_calib.csv')).set_index('county_id')
        zone_args['county_id']=zone_args['cid']
        zone_args = pd.merge(zone_args,county_args,left_on='county_id',right_index=True)

        zone_args.res_price_factor = zone_args.res_price_factor*zone_args.cres_price_factor
        zone_args.nonres_price_factor = zone_args.nonres_price_factor*zone_args.cnonres_price_factor
        zone_args.cost_factor = zone_args.cost_factor*zone_args.ccost_factor
        emp_zone_diff = emp_zone_diff*zone_args.cemp_demand_factor
        hh_zone_diff = hh_zone_diff*zone_args.chh_demand_factor
    else:
        zone_args = None

    # ##########################################
    # #### Getting possible rents by use here ##
    # ##########################################
    buildings = buildings[['building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units', 'unit_price_res_sqft']]
    buildings.loc[:, "zone_id"] = parcels.zone_id[buildings.parcel_id].values  #  corrected chain index error

    res_buildings = buildings[buildings.unit_price_residential>0]
    nonres_buildings = buildings[buildings.unit_price_non_residential>0]
    nonres_buildings_office = nonres_buildings[nonres_buildings.building_type_id==5]
    nonres_buildings_retail = nonres_buildings[np.in1d(nonres_buildings.building_type_id,[17,18])]
    nonres_buildings_industrial = nonres_buildings[np.in1d(nonres_buildings.building_type_id,[9,22])]

    # Price now are in price/sqft
    #### XG: define residential price only on types 2,3, 20, 24 and non-residential 5, 9, 17,18,22
    zone_args['zone_id']=zone_args.index
    res_buildings.loc[:, "resprice_sqft"] = res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].unit_price_res_sqft  # corrected chain index error
    zonal_resprice_sqft = pd.DataFrame(res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].groupby('zone_id').resprice_sqft.mean())
    zonal_resprice_sqft.columns=['resrent']
    zone_args=pd.merge(zone_args,zonal_resprice_sqft, left_on='zone_id', right_index=True, how='outer')
    zonal_nonresprice_office = pd.DataFrame(nonres_buildings_office[nonres_buildings_office.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_nonresprice_office.columns=['nonresrent_office']
    zone_args=pd.merge(zone_args,zonal_nonresprice_office, left_on='zone_id', right_index=True, how='outer')
    zonal_nonresprice_retail = pd.DataFrame(nonres_buildings_retail[ nonres_buildings_retail.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_nonresprice_retail.columns=['nonresrent_retail']
    zone_args=pd.merge(zone_args,zonal_nonresprice_retail, left_on='zone_id', right_index=True, how='outer')
    zonal_nonresprice_industrial = pd.DataFrame(nonres_buildings_industrial[ nonres_buildings_industrial.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_nonresprice_industrial.columns=['nonresrent_industrial']
    zone_args=pd.merge(zone_args,zonal_nonresprice_industrial, left_on='zone_id', right_index=True, how='outer')
    zone_args['resrent']=zone_args['resrent']*zone_args.res_price_factor
    zone_args['nonresrent_office']=zone_args['nonresrent_office']* zone_args.nonres_price_factor
    zone_args['nonresrent_retail']=zone_args['nonresrent_retail']* zone_args.nonres_price_factor
    zone_args['nonresrent_industrial']=zone_args['nonresrent_industrial']* zone_args.nonres_price_factor
    zonal_avg_rents= zone_args[['resrent', 'nonresrent_office', 'nonresrent_retail','nonresrent_industrial','cost_factor','allowable_density_factor']]
    zonal_avg_rents.loc[:, "zone_id"] = zonal_avg_rents.index  #  corrected chain index error
    zonal_avg_rents.loc[:, 'county_id'] = dset.zones.county_id[zonal_avg_rents['zone_id']].values  # corrected chain index error
    pd.set_option('display.max_rows', len(dset.zones.index))
    del  zonal_avg_rents['county_id']
    del zonal_avg_rents['zone_id']

    """
    res_buildings['resprice_sqft'] = res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].unit_price_res_sqft
    zonal_resprice_sqft = pd.DataFrame(res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].groupby('zone_id').resprice_sqft.mean())
    zonal_nonresprice_office = pd.DataFrame(nonres_buildings_office[nonres_buildings_office.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_avg_rents=pd.join(zonal_resprice_sqft, zonal_nonresprice_office, how='outer')
    print  zonal_avg_rents
    sys.exit('beurk')
    zonal_nonresprice_retail = pd.DataFrame(nonres_buildings_retail[ nonres_buildings_retail.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_avg_rents=pd.join( zonal_nonresprice_retail, zonal_avg_rents, how='outer')
    zonal_nonresprice_industrial = nonres_buildings_industrial[ nonres_buildings_industrial.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()
    zonal_resrent = zonal_resprice_sqft
    zonal_nonresrent_office = zonal_nonresprice_office
    zonal_nonresrent_retail = zonal_nonresprice_retail
    zonal_nonresrent_industrial = zonal_nonresprice_industrial

    if zone_args is not None:  #####Make sure no nulls in the prices either...
        zonal_resrent = zonal_resrent * zone_args.res_price_factor
        print zonal_resrent
        zonal_nonresrent_office = zonal_nonresprice_office * zone_args.nonres_price_factor
        zonal_nonresrent_retail = zonal_nonresprice_retail * zone_args.nonres_price_factor
        zonal_nonresrent_industrial = zonal_nonresprice_industrial * zone_args.nonres_price_factor
        zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial,'cost_factor':zone_args.cost_factor,'allowable_density_factor':zone_args.allowable_density_factor}, index=zonal_resrent.index)
    else:
        zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial})
    zonal_avg_rents['zone_id']=zonal_avg_rents.index
    zonal_avg_rents['county_id']=dset.zones.county_id[zonal_avg_rents['zone_id']].values
    pd.set_option('display.max_rows', len(dset.zones.index))
    print zonal_avg_rents[ zonal_avg_rents['county_id']==8123].zone_id
    del  zonal_avg_rents['county_id']
    del zonal_avg_rents['zone_id']
    """
    avgrents = pd.merge(parcels,zonal_avg_rents,left_on='zone_id',right_index=True,how='left')
    avgrents['residential'] = avgrents.resrent
    avgrents['office'] = avgrents.nonresrent_office
    avgrents['retail'] = avgrents.nonresrent_retail
    avgrents['industrial'] = avgrents.nonresrent_industrial

    if zone_args is not None:
        avgrents = avgrents[['residential','office','retail','industrial','cost_factor','allowable_density_factor', 'county_id']]
    else:
        avgrents = avgrents[['residential','office','retail','industrial']]
    avgrents = avgrents.fillna(.1)

    #avgrents.residential[np.isinf(avgrents.residential)] = .2
    avgrents.loc[avgrents.residential < .2, "residential"] = .2  # corrected chain index error
    avgrents.loc[avgrents.office < 1, "office"] = 1  # corrected chain index error
    avgrents.loc[avgrents.retail < 1, "retail"] = 1  # corrected chain index error
    avgrents.loc[avgrents.industrial < 1, "industrial"] = 1  # corrected chain index error

    ####################GET PARCEL LEVEL ATTRIBUTES
    #### XG: retain old square footage as it is used to compute average
    buildings.loc[:, 'bldg_sq_ft2'] = buildings['bldg_sq_ft']  # corrected chain index error
    buildings.loc[:, 'bldg_sq_ft'] = buildings.non_residential_sqft + buildings.residential_units*buildings.sqft_per_unit  # corrected chain index error
    #buildings['impval'] = buildings.non_residential_sqft*buildings.unit_price_non_residential + buildings.residential_units*buildings.unit_price_residential
    buildings.loc[:, 'impval'] = 0  # corrected chain index error
    buildings.loc[buildings.residential_units*buildings.unit_price_residential>0,'impval'] = buildings.residential_units*buildings.unit_price_residential
    buildings.loc[buildings.non_residential_sqft*buildings.unit_price_non_residential >0,'impval']=buildings['impval']+ buildings.non_residential_sqft*buildings.unit_price_non_residential
    far_predictions = pd.DataFrame(index=parcels.index)
    #far_predictions['current_yearly_rent_buildings'] = buildings.groupby('parcel_id').impval.sum()/17.9
    far_predictions['current_yearly_rent_buildings'] = buildings.groupby('parcel_id').impval.sum()
    far_predictions['current_yearly_rent_buildings'] = far_predictions.current_yearly_rent_buildings.fillna(0)
    far_predictions.current_yearly_rent_buildings = far_predictions.current_yearly_rent_buildings * developer_configuration['land_property_acquisition_cost_factor']
    far_predictions['county_id']=parcels.county_id[far_predictions.index].values
    print  far_predictions[far_predictions['current_yearly_rent_buildings']>0].groupby('county_id').current_yearly_rent_buildings.mean()


    if zone_args is not None:
        #far_predictions.current_yearly_rent_buildings = avgrents.cost_factor*far_predictions.current_yearly_rent_buildings ##Cost scaling happens here
        far_predictions.current_yearly_rent_buildings = far_predictions.current_yearly_rent_buildings
    far_predictions['parcelsize'] = parcels.parcel_sqft



    ###PROFORMA SURFACE CALCULATIONS AND LOOKUPS (TO ARRIVE AT UNCONSTRAINED FARS BY USE)
    # do the lookup in the developer model - this is where the profitability is computed

    dev = spotproforma.Developer(profit_factor=developer_configuration['profit_factor'])
    for form in spotproforma.forms.keys():
        far_predictions[form+'_feasiblefar'], far_predictions[form+'_profit'] = \
                dev.lookup(form,avgrents[spotproforma.uses].as_matrix(),far_predictions.current_yearly_rent_buildings,far_predictions.parcelsize)


    # we now have a far prediction per parcel by allowable building type!

    #################DEVCONSTRAINTS:  Obtain zoning and other development constraints #####
    zoning = dset.fetch('zoning')
    fars = dset.fetch('fars')
    max_parcel_sqft = 200000
    max_far_field = developer_configuration['max_allowable_far_field_name']
    if max_far_field not in parcels.columns:
        parcels = pd.merge(parcels,fars,left_on='far_id',right_index=True)
        if developer_configuration['enforce_environmental_constraints']:
            parcels[max_far_field] = parcels[max_far_field]*(1 - parcels.prop_constrained) #Adjust allowable FAR to account for undevelopable proportion of parcel land
        if developer_configuration['enforce_ugb']:
            parcels[max_far_field][parcels.in_ugb==0] = parcels[max_far_field][parcels.in_ugb==0] * developer_configuration['outside_ugb_allowable_density']
        if developer_configuration['uga_policies']:
            parcels[max_far_field][parcels.in_uga==1] = parcels[max_far_field][parcels.in_ugb==1] * developer_configuration['inside_uga_allowable_density']
        parcels.loc[parcels.parcel_sqft < developer_configuration['min_lot_sqft'], "max_far_field"] = 0  # fixed chained index error
        parcels.loc[parcels.parcel_sqft > max_parcel_sqft, "max_far_field"] = 0  # fixed chained indexing error
    if 'type1' not in parcels.columns:
        parcels = pd.merge(parcels,zoning,left_on='zoning_id',right_index=True)
    ##Scale allowable FARs here if needed
    if zone_args is not None:
        parcels[max_far_field] = parcels[max_far_field]*avgrents.allowable_density_factor

    ####### BUILDING TYPE DICTIONARY #####
    type_d = { 
    'residential': [2,3,20,24],
    'industrial': [9,22],
    'retail': [17,18],
    'office': [5],
    }

    ###MERGE ALLOWABLE DENSITY BY USE WITH FEASIBLE DENSITY BY USE (TAKE MINIMUM) TO ARRIVE AT A PARCEL PREDICTION
    # we have zoning by like 16+ building types and rents/far predictions by 4 more aggregate building types
    # so we have to convert one into the other
    parcel_predictions = pd.DataFrame(index=parcels.index)

    parcel_predictions['county_id']=parcels.county_id

    for typ, btypes in type_d.iteritems():
        for btype in btypes:
            # three questions - 1) is type allowed 2) what FAR is allowed 3) is it supported by rents
            if developer_configuration['enforce_allowable_use_constraints']:
                tmp = parcels[parcels['type%d'%btype]==1][[max_far_field]]

                 # is type allowed
                far_predictions['type%d_zonedfar'%btype] = tmp[max_far_field] # at what far
            else:
                far_predictions['type%d_zonedfar'%btype] = parcels[max_far_field]
            # merge zoning with feasibility
            tmp.index.name='parcel_id'
            tmp = pd.merge(tmp,far_predictions[[typ+'_feasiblefar']],left_index=True,right_index=True,how='left').set_index(tmp.index)
            # min of zoning and feasibility

            parcel_predictions[btype] = pd.Series(np.minimum(tmp[max_far_field],tmp[typ+'_feasiblefar']),index=tmp.index)
            #avgrents2=avgrents.ix[parcels['type%d'%btype]==1]
            #profit=dev.profit(typ,avgrent2s[spotproforma.uses].as_matrix(),far_predictions.current_yearly_rent_buildings,parcel_prediction[btype])

            #print profit
            #parcel_predictions[btype+'_profit']=pd.Series(profit,index=tmp.index)
    parcel_predictions = parcel_predictions.dropna(how='all').sort_index(axis=1)


    for col in parcel_predictions.columns:
        print col, (parcel_predictions[col]*far_predictions.parcelsize).sum()/1000000.0  ###LIMITING PARCEL PREDICTIONS TO 1MILLION SQFT

    ####SELECTING SITES
    np.random.seed(1)
    p_sample_proportion = .5
    parcel_predictions = parcel_predictions.ix[np.random.choice(parcel_predictions.index, int(len(parcel_predictions.index)*p_sample_proportion),replace=False)]
    parcel_predictions.index.name = 'parcel_id'


    parcel_predictions.to_csv(os.path.join(misc.data_dir(),'parcel_predictions.csv'),index_col='parcel_id',float_format="%.2f")
    # far_predictions.to_csv(os.path.join(misc.data_dir(),'far_predictions.csv'),index_col='parcel_id',float_format="%.2f")

    #####CALL TO THE DEVELOPER
    newbuildings, price_shifters  = new_developer.run(dset,hh_zone_diff,emp_zone_diff,parcel_predictions,year=sim_year,
                                 min_building_sqft=developer_configuration['min_building_sqft'],
                                 min_lot_sqft=developer_configuration['min_lot_sqft'],
                                 max_lot_sqft=max_parcel_sqft,zone_args=zone_args, tot_sqft=dset.zones[['residential_sqft_zone','non_residential_sqft_zone']])
                                 
    #####APPLY PRICE SHIFTS (PSEUDO-EQUILIBRATION) [MAKE THIS OPTIONAL]
    print 'Applying price shifts'
    pshift_btypes = []
    pshift_zone = []
    pshift_shift = []
    for item in price_shifters.items():
        pshift_btypes.append(item[0][0])
        pshift_zone.append(item[0][1])
        pshift_shift.append(item[1])
    pshift = pd.DataFrame({'btype':pshift_btypes,'zone':pshift_zone,'shift_amount':pshift_shift})
    buildings['zone_id'] = parcels.loc[buildings.parcel_id, "zone_id"].values
    buildings['bid'] = buildings.index.values
    buildings = pd.merge(buildings,pshift,left_on=['building_type_id','zone_id'],right_on=['btype','zone'],how='left')
    buildings.shift_amount = buildings.shift_amount.fillna(1.0)
    buildings.unit_price_residential = buildings.unit_price_residential*buildings.shift_amount
    #buildings.unit_price_non_residential = buildings.unit_price_non_residential*buildings.shift_amount
    buildings.index = buildings.bid
    
    ##When net residential units is less than 0, need to implement building demolition
    newbuildings = newbuildings[['zone_id','building_type_id',
                                 'building_sqft','residential_units','lot_size']]
    #print newbuildings.building_sqft
    newbuildings = newbuildings.reset_index()

    newbuildings.columns = ['parcel_id','zone_id','building_type_id','bldg_sq_ft','residential_units','land_area']
    newbuildings.parcel_id = newbuildings.parcel_id.astype('int32')
    #newbuildings['county_id']=parcel_predictions.county_id[newbuildings.parcel_id].values  # why is this here?

    #print newbuildings[newbuildings.residential_units == 0].groupby('county_id').bldg_sq_ft.sum()
    newbuildings.residential_units = newbuildings.residential_units.astype('int32')
    newbuildings.land_area = newbuildings.land_area.astype('int32')
    newbuildings.building_type_id = newbuildings.building_type_id.astype('int32')
    newbuildings.parcel_id = newbuildings.parcel_id.astype('int32')
    newbuildings.bldg_sq_ft = np.round(newbuildings.bldg_sq_ft).astype('int32')
    newbuildings.bldg_sq_ft2 = np.round(newbuildings.bldg_sq_ft).astype('int32')

    newbuildings['non_residential_sqft'] = 0
    newbuildings.loc[newbuildings.residential_units == 0, "non_residential_sqft"] = newbuildings.bldg_sq_ft
    newbuildings['improvement_value'] = (newbuildings.non_residential_sqft*100 + newbuildings.residential_units*100000).astype('int32')
    newbuildings['sqft_per_unit'] = 1400
    newbuildings.loc[newbuildings.residential_units>0, "sqft_per_unit"] = 1000
    newbuildings['stories'] = np.ceil(newbuildings.bldg_sq_ft*1.0/newbuildings.land_area).astype('int32')
    newbuildings['tax_exempt'] = 0
    newbuildings['year_built'] = sim_year
    newbuildings['unit_price_residential'] = 0.0
    newbuildings.loc[newbuildings.residential_units>0, "unit_price_residential"] = buildings[buildings.unit_price_residential>0].unit_price_residential.median()

    newbuildings['unit_price_res_sqft'] = 0.0
    newbuildings.loc[newbuildings.residential_units>0, "unit_price_res_sqft"] = buildings[buildings.unit_price_res_sqft>0].unit_price_res_sqft.median()

    newbuildings['unit_price_non_residential'] = 0.0
    newbuildings.loc[newbuildings.non_residential_sqft>0, "unit_price_non_residential"] = buildings[buildings.unit_price_non_residential>0].unit_price_non_residential.median()

    ##### XG: originally, impose exogenous prices for new buildings. Now impose average county price
    #newbuildings['county_id'] = dset.parcels.county_id[newbuildings.parcel_id].values  # improper join - index incorrect
    newbuildings['county_id'] = parcels.loc[newbuildings.parcel_id, "county_id"].values

    #buildings['county_id'] = dset.parcels.county_id[buildings.parcel_id].values  # improper join - index incorrect
    buildings['county_id'] = parcels.loc[buildings.parcel_id, "county_id"].values
    u=pd.DataFrame(buildings[(buildings.bldg_sq_ft2>0)*(np.in1d(buildings.building_type_id,[2,3,20,24]))].groupby('county_id').unit_price_res_sqft.mean())
    u.columns=['res_price_county']

    newbuildings=pd.merge(newbuildings, u, left_on='county_id', right_index=True)

    u=pd.DataFrame(buildings[(buildings.non_residential_sqft>0)*(np.in1d(buildings.building_type_id,[5,9,17,18,22]))].groupby('county_id').unit_price_non_residential.mean())
    u.columns=['nres_price_county']
    newbuildings=pd.merge(newbuildings, u, left_on='county_id', right_index=True)

    u=pd.DataFrame(buildings.groupby('county_id').unit_price_residential.mean())
    u.columns=['unit_res_price_county']
    newbuildings=pd.merge(newbuildings, u, left_on='county_id', right_index=True)

    newbuildings.loc[(newbuildings.bldg_sq_ft>0)*(np.in1d(newbuildings.building_type_id,[2,3,20,24])), "unit_price_residential"] = newbuildings.unit_res_price_county
    newbuildings.loc[(newbuildings.bldg_sq_ft>0)*(np.in1d(newbuildings.building_type_id,[2,3,20,24])), "unit_price_res_sqft"] = newbuildings.res_price_county
    newbuildings.loc[(newbuildings.non_residential_sqft>0)*(np.in1d(newbuildings.building_type_id,[5,9,17,18,22])), "unit_price_non_residential"] = newbuildings.nres_price_county
    #print newbuildings[(np.in1d(newbuildings.building_type_id,[2,3,20,24]))*(newbuildings['bldg_sq_ft']>0)].groupby('county_id').unit_price_res_sqft.mean()
    #### end XG



    newbuildings['building_sqft_per_job'] = 250.0  #####Need to replace with observed
    newbuildings['non_residential_units'] = (newbuildings.non_residential_sqft/newbuildings.building_sqft_per_job).fillna(0)
    newbuildings['base_year_jobs'] = 0.0
    newbuildings['all_units'] = newbuildings.non_residential_units + newbuildings.residential_units 

    newbuildings.non_residential_sqft = newbuildings.non_residential_sqft.astype('int32')
    newbuildings.tax_exempt = newbuildings.tax_exempt.astype('int32')
    newbuildings.year_built = newbuildings.year_built.astype('int32')
    newbuildings.sqft_per_unit = newbuildings.sqft_per_unit.astype('int32')
    newbuildings = newbuildings.set_index(np.arange(len(newbuildings.index))+np.amax(buildings.index.values)+1)

    buildings = buildings[['zone_id','building_type_id','improvement_value','land_area','non_residential_sqft','parcel_id','residential_units','sqft_per_unit','stories','tax_exempt','year_built','bldg_sq_ft','bldg_sq_ft2','unit_price_non_residential','unit_price_residential','building_sqft_per_job','non_residential_units','base_year_jobs','all_units', 'unit_price_res_sqft']]
    
    return buildings, newbuildings
Пример #19
0
def run(dset, current_year):
    """Refines zone level model results
    """

    b = dset.buildings
    p = dset.parcels
    if p.index.name != 'parcel_id':
        p = p.set_index('parcel_id')

    z = dset.zones

    e = dset.establishments
    hh = dset.households
    zone_refine = pd.read_csv(
        os.path.join(misc.data_dir(), 'zone_demand_refine_no_broomfield.csv'))
    shuffled_hh_id = np.random.shuffle(hh.index.values)
    shuffled_emp_id = np.random.shuffle(e.index.values)

    def relocate_agents(agents_joined, zone_id, number_of_agents):
        agent_pool = agents_joined[agents_joined.zone_id != zone_id]
        #shuffled_ids = agent_pool.index.values
        #np.random.shuffle(shuffled_ids)
        #agents_to_relocate = shuffled_ids[:number_of_agents]
        #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate)

        random_sample = random.sample(agent_pool.index, number_of_agents)

        # new_building_id = b[b.zone_id==zone_id].index.values[0]
        # dset.households.building_id[idx_agents_to_relocate] = new_building_id
        #try:
        new_building_id = b[b.zone_id == zone_id].index.values[0]
        agents_joined.loc[random_sample, "building_id"] = new_building_id
        # except:
        #     print 'No buildings in specified zone.'
        #     if zone_id not in dset.parcels.zone_id.values:
        #         county = z.county.values[z.index.values==zone_id][0]
        #         x = z.zonecentroid_x.values[z.index.values==zone_id][0]
        #         y = z.zonecentroid_y.values[z.index.values==zone_id][0]
        #         if county == 'Denver':
        #             county_id = 8031
        #         elif county == 'Adams':
        #             county_id = 8001
        #         elif county == 'Arapahoe':
        #             county_id = 8005
        #         elif county == 'Boulder':
        #             county_id = 8013
        #         elif county == 'Broomfield':
        #             county_id = 8014
        #         elif county == 'Clear Creek':
        #             county_id = 8019
        #         elif county == 'Douglas':
        #             county_id = 8035
        #         elif county == 'Elbert':
        #             county_id = 8039
        #         elif county == 'Gilpin':
        #             county_id = 8047
        #         elif county == 'Jefferson':
        #             county_id = 8059
        #         elif county == 'Weld':
        #             county_id = 8123
        #         pid = p.index.values.max()+1
        #         newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id],
        #                                      'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0],
        #                                      'prop_constrained':[0.0],'acres':[1.0] })
        #         newparcel.index = np.array([pid])
        #         dset.d['parcels'] = pd.concat([p,newparcel])
        #         dset.parcels.index.name = 'parcel_id'
        #     else:
        #         pid = p.index.values[p.zone_id==zone_id][0]
        #     print 'Constructing small structure to place agents'
        #     new_building_id = dset.buildings.index.values.max() + 1
        #     newbuildings = pd.DataFrame({'building_type_id':[20],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[0],
        #                                  'parcel_id':[pid],'residential_units':[2],'sqft_per_unit':[250],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500],
        #                                  'unit_price_non_residential':[0.0],'unit_price_residential':[5000.0], 'building_sqft_per_job':[0.0],
        #                                  'non_residential_units':[0],'base_year_jobs':[0.0],'all_units':[2]})
        #     newbuildings.index = np.array([new_building_id])
        #     dset.d['buildings'] = pd.concat([dset.buildings,newbuildings])
        #     dset.buildings.index.name = 'building_id'
        #     agents_joined.building_id[idx_agents_to_relocate] = new_building_id

    def unplace_agents(agents_joined, zone_id, number_of_agents):
        number_of_agents = -number_of_agents  #flip the sign
        agent_pool = agents_joined[
            agents_joined.zone_id ==
            zone_id]  ##Notice the equality instead of disequality
        if len(agent_pool) > number_of_agents:
            #shuffled_ids = agent_pool.index.values
            #np.random.shuffle(shuffled_ids)
            #agents_to_relocate = shuffled_ids[:number_of_agents]
            #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate)
            random_sample = random.sample(agent_pool.index, number_of_agents)
            dset.households.building_id[random_sample] = -1  #unplace

    def relocate_estabs(agents_joined, zone_id, number_of_agents):
        agent_pool = agents_joined[(agents_joined.zone_id != zone_id)]
        e_sample = agent_pool.reindex(np.random.permutation(agent_pool.index))
        e_to_move = e_sample[np.cumsum(e_sample['employees'].values) < abs(
            number_of_agents + 10)]
        shuffled_ids = e_to_move.index.values
        #np.random.shuffle(shuffled_ids)
        agents_to_relocate = shuffled_ids
        idx_agents_to_relocate = np.in1d(dset.establishments.index.values,
                                         agents_to_relocate)

        # new_building_id = b[b.zone_id==zone_id].index.values[0]
        # dset.establishments.building_id[idx_agents_to_relocate] = new_building_id
        #try:
        new_building_id = b[b.zone_id == zone_id].index.values[0]
        agents_joined.loc[
            idx_agents_to_relocate,
            "building_id"] = new_building_id  # corrected chain index error
        # except:
        #     print 'No buildings in specified zone.'
        #     if zone_id not in dset.parcels.zone_id.values:
        #         county = z.county.values[z.index.values==zone_id][0]
        #         x = z.zonecentroid_x.values[z.index.values==zone_id][0]
        #         y = z.zonecentroid_y.values[z.index.values==zone_id][0]
        #         if county == 'Denver':
        #             county_id = 8031
        #         elif county == 'Adams':
        #             county_id = 8001
        #         elif county == 'Arapahoe':
        #             county_id = 8005
        #         elif county == 'Boulder':
        #             county_id = 8013
        #         elif county == 'Broomfield':
        #             county_id = 8014
        #         elif county == 'Clear Creek':
        #             county_id = 8019
        #         elif county == 'Douglas':
        #             county_id = 8035
        #         elif county == 'Elbert':
        #             county_id = 8039
        #         elif county == 'Gilpin':
        #             county_id = 8047
        #         elif county == 'Jefferson':
        #             county_id = 8059
        #         elif county == 'Weld':
        #             county_id = 8123
        #         pid = p.index.values.max()+1
        #         newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id],
        #                                      'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0],
        #                                      'prop_constrained':[0.0],'acres':[1.0] })
        #         newparcel.index = np.array([pid])
        #         dset.d['parcels'] = pd.concat([p,newparcel])
        #         dset.parcels.index.name = 'parcel_id'
        #     else:
        #         pid = p.index.values[p.zone_id==zone_id][0]
        #     print 'Constructing small structure to place agents'
        #     new_building_id = dset.buildings.index.values.max() + 1
        #     newbuildings = pd.DataFrame({'building_type_id':[4],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[500],
        #                                  'parcel_id':[pid],'residential_units':[0],'sqft_per_unit':[0],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500],
        #                                  'unit_price_non_residential':[2.0],'unit_price_residential':[0.0], 'building_sqft_per_job':[250.0],
        #                                  'non_residential_units':[2],'base_year_jobs':[0.0],'all_units':[2]})
        #     newbuildings.index = np.array([new_building_id])
        #     dset.d['buildings'] = pd.concat([dset.buildings,newbuildings])
        #     dset.buildings.index.name = 'building_id'
        #     agents_joined.loc[idx_agents_to_relocate, "building_id"] = new_building_id  # corrected chain index error

    def unplace_estabs(agents_joined, zone_id, number_of_agents):
        number_of_agents = -number_of_agents  #flip the sign
        agent_pool = agents_joined[
            agents_joined.zone_id ==
            zone_id]  ##Notice the equality instead of disequality
        if agent_pool.employees.sum() > number_of_agents:
            e_sample = agent_pool.reindex(
                np.random.permutation(agent_pool.index))
            e_to_move = e_sample[np.cumsum(e_sample['employees'].values) < abs(
                number_of_agents)]
            shuffled_ids = e_to_move.index.values
            np.random.shuffle(shuffled_ids)
            agents_to_relocate = shuffled_ids
            idx_agents_to_relocate = np.in1d(dset.establishments.index.values,
                                             agents_to_relocate)
            dset.establishments.building_id[
                idx_agents_to_relocate] = -1  #unplace

    # for zone in zone_refine.zone_id.values:
    #     idx_zone = (zone_refine.zone_id==zone)
    #     hh_shift = zone_refine.annual_hh_shift[idx_zone].values[0]
    #     emp_shift = zone_refine.annual_emp_shift[idx_zone].values[0]
    #     if hh_shift > 0:
    #         relocate_agents(hh,zone,hh_shift)
    #     if emp_shift > 0:
    #         relocate_estabs(e,zone,emp_shift)
    #     if current_year < 2040:
    #         if hh_shift < 0:
    #             unplace_agents(hh,zone,hh_shift)
    #         if emp_shift < 0:
    #             unplace_agents(e,zone,emp_shift)
    def refine(series):
        hh_shift = series.annual_hh_shift
        emp_shift = series.annual_emp_shift
        zone = series.zone_id
        if hh_shift > 0:
            relocate_agents(hh, zone, hh_shift)
        if emp_shift > 0:
            relocate_estabs(e, zone, emp_shift)
        if current_year < 2040:
            if hh_shift < 0:
                unplace_agents(hh, zone, hh_shift)
            if emp_shift < 0:
                unplace_estabs(e, zone, emp_shift)

    zone_refine.apply(refine, axis=1)
Пример #20
0
    for btype in btypes:

      # three questions - 1) is type allowed 2) what FAR is allowed 3) is it supported by rents
      tmp = parcels[parcels['type%d'%btype]=='t'][['max_far']] # is type allowed
      far_predictions['type%d_zonedfar'%btype] = tmp['max_far'] # at what far

      # merge zoning with feasibility
      tmp = pd.merge(tmp,far_predictions[[typ+'_feasiblefar']],left_index=True,right_index=True,how='left').set_index(tmp.index)

      # min of zoning and feasibility
      parcel_predictions[btype] = pd.Series(np.minimum(tmp['max_far'],tmp[typ+'_feasiblefar']),index=tmp.index) 
  parcel_predictions = parcel_predictions.dropna(how='all').sort_index(axis=1)

  print "Average rents\n", avgrents.describe()
  print "Feasibility\n", far_predictions.describe()
  print "Restricted to zoning\n", parcel_predictions.describe()
  print "Feasible square footage (in millions)"
  for col in parcel_predictions.columns: 
    print col, (parcel_predictions[col]*far_predictions.parcelsize).sum()/1000000.0
  parcel_predictions.to_csv('parcel_predictions.csv',index_col='parcel_id',float_format="%.2f")
  far_predictions.to_csv('far_predictions.csv',index_col='parcel_id',float_format="%.2f")
  print "Finished developer", time.ctime()

if __name__ == '__main__':  

  print "Running spotproforma"
  dev = spotproforma.Developer()
  print "Done running spotproforma"
  dset = baydataset.BayAreaDataset(os.path.join(misc.data_dir(),'bayarea.h5'))
  run(dset,2010,dev=dev)
Пример #21
0
    def run(self, name=None, export_buildings_to_urbancanvas=False, base_year=2010, forecast_year=None, fixed_seed=True, random_seed=1, indicator_configuration=None, core_components_to_run=None, household_transition=None,household_relocation=None,employment_transition=None, elcm_configuration=None, developer_configuration=None, table_swapping=None, travel_model_configuration1=None, travel_model_configuration2=None, travel_model_configuration3=None, travel_model_configuration4=None, travel_model_configuration5=None, travel_model_configuration6=None):
        """Runs an UrbanSim2 scenario 
        """
        logger.log_status('Starting UrbanSim2 run.')
        dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))
        seconds_start = time.time()
        if fixed_seed:
            logger.log_status('Running with fixed random seed.')
            np.random.seed(random_seed)
            
        #Load estimated coefficients
        coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs.h5'))
        dset.coeffs = coeff_store.coeffs.copy()
        coeff_store.close()

        coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs_res.h5'))
        dset.coeffs_res = coeff_store.coeffs_res.copy()
        coeff_store.close()

        #Keep track of unplaced agents by year
        unplaced_hh = []
        unplaced_emp = []
        
        #UrbanCanvas scenario id, replaced by db-retrieved value during export step
        urbancanvas_scenario_id = 0

                #####Residential Buildings#####
        new_refiner.add_res_buildings(dset)

        #####Non-Residential Buildings#####
        new_refiner.add_non_res_buildings(dset)
        
        for sim_year in range(base_year,forecast_year+1):
            print 'Simulating year ' + str(sim_year)
            logger.log_status(sim_year)

            ##Variable Library calculations
            variable_library.calculate_variables(dset)
            
            #Record pre-demand model zone-level household/job totals
            hh_zone1 = dset.fetch('households').groupby('zone_id').size()
            emp_zone1 = dset.fetch('establishments').groupby('zone_id').employees.sum()
            
            ############     ELCM SIMULATION
            if core_components_to_run['ELCM']:
                logger.log_status('ELCM simulation.')
                alternatives = dset.buildings[(dset.buildings.non_residential_sqft>0)]
                new_elcm_model.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'establishments',output_names = ("drcog-coeff-elcm-%s.csv","DRCOG EMPLOYMENT LOCATION CHOICE MODELS (%s)","emp_location_%s","establishment_building_ids"),
                                         agents_groupby= ['sector_id_retail_agg',],transition_config = {'Enabled':True,'control_totals_table':'annual_employment_control_totals','scaling_factor':1.0})

            #################     HLCM SIMULATION
            if core_components_to_run['HLCM']:
                logger.log_status('HLCM simulation.')
                alternatives = dset.buildings[(dset.buildings.residential_units>0)]
                new_hlcm_simulation.simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'households',output_names = ("drcog-coeff-hlcm-%s.csv","DRCOG HOUSEHOLD LOCATION CHOICE MODELS (%s)","hh_location_%s","household_building_ids"),
                                         agents_groupby= ['income_3_tenure',],transition_config = {'Enabled':True,'control_totals_table':'annual_household_control_totals','scaling_factor':1.0},
                                         relocation_config = {'Enabled':True,'relocation_rates_table':'annual_household_relocation_rates','scaling_factor':1.0},)
                                         
            ############     DEMAND-SIDE REFINEMENT
            #refiner.run(dset, sim_year)
            # refiner_fnc = "refiner.run(dset, sim_year)"
            #cProfile.runctx(refiner_fnc, locals={'dset':dset, 'sim_year':sim_year}, globals={'refiner': refiner}, filename='c:/users/jmartinez/documents/refiner_time')

            ############     REPM SIMULATION
            if core_components_to_run['Price']:
                logger.log_status('REPM simulation.')
                #Residential
                census_model_simulation.simulate_residential(dset, 'unit_price_res_sqft', 'school_district_id', 10, sim_year)

                #Non-residential                                    
                regression_model_simulation.simulate(dset, year=sim_year,output_varname='unit_price_non_residential', simulation_table='buildings', output_names = ["drcog-coeff-nrhedonic-%s.csv","DRCOG NRHEDONIC MODEL (%s)","nrprice_%s"],
                                                     agents_groupby = 'building_type_id', segment_ids = [5,8,11,16,17,18,21,23,9,22])
            
            ############     DEVELOPER SIMULATION
            if core_components_to_run['Developer']:
                logger.log_status('Proforma simulation.')
                buildings, newbuildings = proforma_developer_model.run(dset,hh_zone1,emp_zone1,developer_configuration,sim_year)
                #import pdb; pdb.set_trace()
                dset.d['buildings'] = pd.concat([buildings,newbuildings])
                dset.buildings.index.name = 'building_id'
            
            ############   INDICATORS
            if indicator_configuration['export_indicators']:
                unplaced_hh.append((dset.households.building_id==-1).sum())
                unplaced_emp.append(dset.establishments[dset.establishments.building_id==-1].employees.sum())
                if sim_year in indicator_configuration['years_to_run']:
                    logger.log_status('Exporting indicators')
                    indicators.run(dset, indicator_configuration['indicator_output_directory'], sim_year)
                    logger.log_status('unplaced hh')
                    logger.log_status(unplaced_hh)
                    logger.log_status('unplaced emp')
                    logger.log_status(unplaced_emp)
                    
            ############     TRAVEL MODEL
            export_zonal_file.export_zonal_file_to_tm(dset,sim_year,logger,tm_config=[travel_model_configuration1,travel_model_configuration2,travel_model_configuration3,travel_model_configuration4,travel_model_configuration5,travel_model_configuration6])
                    
            ############     SWAPPER
            if sim_year == table_swapping['year']:
                if table_swapping['swap_skims']:
                    logger.log_status('Swapping skims')
                    td2 = pd.read_csv(table_swapping['new_skim_file'], index_col=['from_zone_id','to_zone_id'])
                    dset.d['travel_data'] = td2
                if table_swapping['swap_dist_rail']:
                    logger.log_status('Swapping parcel distance to rail')
                    p2 = pd.read_csv(table_swapping['new_dist_rail_file'], index_col=['parcel_id'])
                    dset.d['parcels']['dist_rail'] = p2.dist_rail
            
            ############      URBANCANVAS
            if export_buildings_to_urbancanvas:
                logger.log_status('Exporting %s buildings to Urbancanvas database for project %s and year %s.' % (newbuildings.index.size,urbancanvas_scenario_id,sim_year))
                urbancanvas_scenario_id = urbancanvas_export.export_to_urbancanvas(newbuildings, sim_year, urbancanvas_scenario_id)
                
        elapsed = time.time() - seconds_start
        print "TOTAL elapsed time: " + str(elapsed) + " seconds."
Пример #22
0
        newbuildings.building_sqft / RESUNITSIZE)
    newbuildings = newbuildings[newbuildings.lot_size < MAXLOTSIZE]
    newbuildings = newbuildings[newbuildings.lot_size > MINLOTSIZE]

    return newbuildings, price_shifters_d

    #price_shifters.to_csv('c:/users/jmartinez/documents/test_results.csv')


sqft = pd.DataFrame()
if __name__ == '__main__':

    import dataset
    import cProfile

    dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(), 'drcog.h5'))

    #add variables for test sim
    emp_zone_diff = pd.read_csv(
        'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/emp_zone_diff.csv',
        index_col=0)
    hh_zone_diff = pd.read_csv(
        'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/hh_zone_diff.csv',
        index_col=0)
    parcel_predictions = pd.read_csv(
        'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/parcel_predictions2.csv',
        index_col=0)
    zone_args = pd.read_csv(
        'C:/Users/jmartinez/Documents/Projects/UrbanSim/Developer/zone_args.csv',
        index_col=0)
    tot_sqft = pd.read_csv(
def run(dset, hh_zone1, emp_zone1, developer_configuration, sim_year):
    # Record post-demand-model change in zone-level household/job totals
    hh = dset.fetch("households")
    e = dset.fetch("establishments")
    buildings = dset.fetch("buildings")
    parcels = dset.parcels
    if parcels.index.name != "parcel_id":
        parcels = parcels.set_index(parcels["parcel_id"])
    buildings["zone_id"] = parcels.zone_id[buildings.parcel_id].values

    # e['zone_id'] = buildings.zone_id[e.building_id].values
    hh["zone_id"] = buildings.zone_id[hh.building_id].values
    hh_zone2 = hh.groupby("zone_id").size()
    emp_zone2 = e.groupby("zone_id").employees.sum()
    zdiff = pd.DataFrame(index=dset.zones.index)  #######
    zdiff["hh_zone1"] = hh_zone1
    zdiff["hh_zone2"] = hh_zone2
    zdiff["emp_zone1"] = emp_zone1
    zdiff["emp_zone2"] = emp_zone2
    zdiff = zdiff.fillna(0)
    zdiff.hh_zone2 = zdiff.hh_zone2 + 5
    zdiff.emp_zone2 = zdiff.emp_zone2 + 5
    hh_zone_diff = zdiff.hh_zone2 - zdiff.hh_zone1
    emp_zone_diff = zdiff.emp_zone2 - zdiff.emp_zone1

    #####Get the user inputted zone args
    if developer_configuration["zonal_levers"]:
        zone_args = pd.read_csv(os.path.join(misc.data_dir(), "devmodal_zone_args.csv")).set_index("zone_id")
        ##Getting county_id into zone_args.  Eventually, lets move the dset.zones operations to the varlib
        dset.zones["county_id"] = 0
        dset.zones.loc[dset.zones.county == "Adams", "county_id"] = 8001  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Arapahoe", "county_id"] = 8005  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Boulder", "county_id"] = 8013  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Broomfield", "county_id"] = 8014  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Clear Creek", "county_id"] = 8019  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Denver", "county_id"] = 8031  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Douglas", "county_id"] = 8035  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Elbert", "county_id"] = 8039  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Gilpin", "county_id"] = 8047  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Jefferson", "county_id"] = 8059  # corrected chained index error
        dset.zones.loc[dset.zones.county == "Weld", "county_id"] = 8123  # corrected chained index error
        zone_args["cid"] = dset.zones.county_id
        pd.set_option("display.max_rows", 1000)

        ##Loading/applying county calib factors to scale the zone args
        county_args = pd.read_csv(os.path.join(misc.data_dir(), "county_calib.csv")).set_index("county_id")
        zone_args["county_id"] = zone_args["cid"]
        zone_args = pd.merge(zone_args, county_args, left_on="county_id", right_index=True)

        zone_args.res_price_factor = zone_args.res_price_factor * zone_args.cres_price_factor
        zone_args.nonres_price_factor = zone_args.nonres_price_factor * zone_args.cnonres_price_factor
        zone_args.cost_factor = zone_args.cost_factor * zone_args.ccost_factor
        emp_zone_diff = emp_zone_diff * zone_args.cemp_demand_factor
        hh_zone_diff = hh_zone_diff * zone_args.chh_demand_factor
    else:
        zone_args = None

    # ##########################################
    # #### Getting possible rents by use here ##
    # ##########################################
    buildings = buildings[
        [
            "building_type_id",
            "improvement_value",
            "land_area",
            "non_residential_sqft",
            "parcel_id",
            "residential_units",
            "sqft_per_unit",
            "stories",
            "tax_exempt",
            "year_built",
            "bldg_sq_ft",
            "unit_price_non_residential",
            "unit_price_residential",
            "building_sqft_per_job",
            "non_residential_units",
            "base_year_jobs",
            "all_units",
            "unit_price_res_sqft",
        ]
    ]
    buildings.loc[:, "zone_id"] = parcels.zone_id[buildings.parcel_id].values  #  corrected chain index error

    res_buildings = buildings[buildings.unit_price_residential > 0]
    nonres_buildings = buildings[buildings.unit_price_non_residential > 0]
    nonres_buildings_office = nonres_buildings[nonres_buildings.building_type_id == 5]
    nonres_buildings_retail = nonres_buildings[np.in1d(nonres_buildings.building_type_id, [17, 18])]
    nonres_buildings_industrial = nonres_buildings[np.in1d(nonres_buildings.building_type_id, [9, 22])]

    # Price now are in price/sqft
    #### XG: define residential price only on types 2,3, 20, 24 and non-residential 5, 9, 17,18,22
    zone_args["zone_id"] = zone_args.index
    res_buildings.loc[:, "resprice_sqft"] = res_buildings[
        (res_buildings.bldg_sq_ft > 0) * (np.in1d(res_buildings.building_type_id, [2, 3, 20, 24]))
    ].unit_price_res_sqft  # corrected chain index error
    zonal_resprice_sqft = pd.DataFrame(
        res_buildings[(res_buildings.bldg_sq_ft > 0) * (np.in1d(res_buildings.building_type_id, [2, 3, 20, 24]))]
        .groupby("zone_id")
        .resprice_sqft.mean()
    )
    zonal_resprice_sqft.columns = ["resrent"]
    zone_args = pd.merge(zone_args, zonal_resprice_sqft, left_on="zone_id", right_index=True, how="outer")
    zonal_nonresprice_office = pd.DataFrame(
        nonres_buildings_office[nonres_buildings_office.non_residential_sqft > 0]
        .groupby("zone_id")
        .unit_price_non_residential.mean()
    )
    zonal_nonresprice_office.columns = ["nonresrent_office"]
    zone_args = pd.merge(zone_args, zonal_nonresprice_office, left_on="zone_id", right_index=True, how="outer")
    zonal_nonresprice_retail = pd.DataFrame(
        nonres_buildings_retail[nonres_buildings_retail.non_residential_sqft > 0]
        .groupby("zone_id")
        .unit_price_non_residential.mean()
    )
    zonal_nonresprice_retail.columns = ["nonresrent_retail"]
    zone_args = pd.merge(zone_args, zonal_nonresprice_retail, left_on="zone_id", right_index=True, how="outer")
    zonal_nonresprice_industrial = pd.DataFrame(
        nonres_buildings_industrial[nonres_buildings_industrial.non_residential_sqft > 0]
        .groupby("zone_id")
        .unit_price_non_residential.mean()
    )
    zonal_nonresprice_industrial.columns = ["nonresrent_industrial"]
    zone_args = pd.merge(zone_args, zonal_nonresprice_industrial, left_on="zone_id", right_index=True, how="outer")
    zone_args["resrent"] = zone_args["resrent"] * zone_args.res_price_factor
    zone_args["nonresrent_office"] = zone_args["nonresrent_office"] * zone_args.nonres_price_factor
    zone_args["nonresrent_retail"] = zone_args["nonresrent_retail"] * zone_args.nonres_price_factor
    zone_args["nonresrent_industrial"] = zone_args["nonresrent_industrial"] * zone_args.nonres_price_factor
    zonal_avg_rents = zone_args[
        [
            "resrent",
            "nonresrent_office",
            "nonresrent_retail",
            "nonresrent_industrial",
            "cost_factor",
            "allowable_density_factor",
        ]
    ]
    zonal_avg_rents.loc[:, "zone_id"] = zonal_avg_rents.index  #  corrected chain index error
    zonal_avg_rents.loc[:, "county_id"] = dset.zones.county_id[
        zonal_avg_rents["zone_id"]
    ].values  # corrected chain index error
    pd.set_option("display.max_rows", len(dset.zones.index))
    del zonal_avg_rents["county_id"]
    del zonal_avg_rents["zone_id"]

    """
    res_buildings['resprice_sqft'] = res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].unit_price_res_sqft
    zonal_resprice_sqft = pd.DataFrame(res_buildings[(res_buildings.bldg_sq_ft>0)*(np.in1d(res_buildings.building_type_id,[2,3,20,24]))].groupby('zone_id').resprice_sqft.mean())
    zonal_nonresprice_office = pd.DataFrame(nonres_buildings_office[nonres_buildings_office.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_avg_rents=pd.join(zonal_resprice_sqft, zonal_nonresprice_office, how='outer')
    print  zonal_avg_rents
    sys.exit('beurk')
    zonal_nonresprice_retail = pd.DataFrame(nonres_buildings_retail[ nonres_buildings_retail.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean())
    zonal_avg_rents=pd.join( zonal_nonresprice_retail, zonal_avg_rents, how='outer')
    zonal_nonresprice_industrial = nonres_buildings_industrial[ nonres_buildings_industrial.non_residential_sqft>0].groupby('zone_id').unit_price_non_residential.mean()
    zonal_resrent = zonal_resprice_sqft
    zonal_nonresrent_office = zonal_nonresprice_office
    zonal_nonresrent_retail = zonal_nonresprice_retail
    zonal_nonresrent_industrial = zonal_nonresprice_industrial

    if zone_args is not None:  #####Make sure no nulls in the prices either...
        zonal_resrent = zonal_resrent * zone_args.res_price_factor
        print zonal_resrent
        zonal_nonresrent_office = zonal_nonresprice_office * zone_args.nonres_price_factor
        zonal_nonresrent_retail = zonal_nonresprice_retail * zone_args.nonres_price_factor
        zonal_nonresrent_industrial = zonal_nonresprice_industrial * zone_args.nonres_price_factor
        zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial,'cost_factor':zone_args.cost_factor,'allowable_density_factor':zone_args.allowable_density_factor}, index=zonal_resrent.index)
    else:
        zonal_avg_rents = pd.DataFrame({'resrent':zonal_resrent,'nonresrent_office':zonal_nonresrent_office,'nonresrent_retail':zonal_nonresrent_retail,'nonresrent_industrial':zonal_nonresrent_industrial})
    zonal_avg_rents['zone_id']=zonal_avg_rents.index
    zonal_avg_rents['county_id']=dset.zones.county_id[zonal_avg_rents['zone_id']].values
    pd.set_option('display.max_rows', len(dset.zones.index))
    print zonal_avg_rents[ zonal_avg_rents['county_id']==8123].zone_id
    del  zonal_avg_rents['county_id']
    del zonal_avg_rents['zone_id']
    """
    avgrents = pd.merge(parcels, zonal_avg_rents, left_on="zone_id", right_index=True, how="left")
    avgrents["residential"] = avgrents.resrent
    avgrents["office"] = avgrents.nonresrent_office
    avgrents["retail"] = avgrents.nonresrent_retail
    avgrents["industrial"] = avgrents.nonresrent_industrial

    if zone_args is not None:
        avgrents = avgrents[
            ["residential", "office", "retail", "industrial", "cost_factor", "allowable_density_factor", "county_id"]
        ]
    else:
        avgrents = avgrents[["residential", "office", "retail", "industrial"]]
    avgrents = avgrents.fillna(0.1)

    # avgrents.residential[np.isinf(avgrents.residential)] = .2
    avgrents.loc[avgrents.residential < 0.2, "residential"] = 0.2  # corrected chain index error
    avgrents.loc[avgrents.office < 1, "office"] = 1  # corrected chain index error
    avgrents.loc[avgrents.retail < 1, "retail"] = 1  # corrected chain index error
    avgrents.loc[avgrents.industrial < 1, "industrial"] = 1  # corrected chain index error

    ####################GET PARCEL LEVEL ATTRIBUTES
    #### XG: retain old square footage as it is used to compute average
    buildings.loc[:, "bldg_sq_ft2"] = buildings["bldg_sq_ft"]  # corrected chain index error
    buildings.loc[:, "bldg_sq_ft"] = (
        buildings.non_residential_sqft + buildings.residential_units * buildings.sqft_per_unit
    )  # corrected chain index error
    # buildings['impval'] = buildings.non_residential_sqft*buildings.unit_price_non_residential + buildings.residential_units*buildings.unit_price_residential
    buildings.loc[:, "impval"] = 0  # corrected chain index error
    buildings.loc[buildings.residential_units * buildings.unit_price_residential > 0, "impval"] = (
        buildings.residential_units * buildings.unit_price_residential
    )
    buildings.loc[buildings.non_residential_sqft * buildings.unit_price_non_residential > 0, "impval"] = (
        buildings["impval"] + buildings.non_residential_sqft * buildings.unit_price_non_residential
    )
    far_predictions = pd.DataFrame(index=parcels.index)
    # far_predictions['current_yearly_rent_buildings'] = buildings.groupby('parcel_id').impval.sum()/17.9
    far_predictions["current_yearly_rent_buildings"] = buildings.groupby("parcel_id").impval.sum()
    far_predictions["current_yearly_rent_buildings"] = far_predictions.current_yearly_rent_buildings.fillna(0)
    far_predictions.current_yearly_rent_buildings = (
        far_predictions.current_yearly_rent_buildings * developer_configuration["land_property_acquisition_cost_factor"]
    )
    far_predictions["county_id"] = parcels.county_id[far_predictions.index].values
    print far_predictions[far_predictions["current_yearly_rent_buildings"] > 0].groupby(
        "county_id"
    ).current_yearly_rent_buildings.mean()

    if zone_args is not None:
        # far_predictions.current_yearly_rent_buildings = avgrents.cost_factor*far_predictions.current_yearly_rent_buildings ##Cost scaling happens here
        far_predictions.current_yearly_rent_buildings = far_predictions.current_yearly_rent_buildings
    far_predictions["parcelsize"] = parcels.parcel_sqft

    ###PROFORMA SURFACE CALCULATIONS AND LOOKUPS (TO ARRIVE AT UNCONSTRAINED FARS BY USE)
    # do the lookup in the developer model - this is where the profitability is computed

    dev = spotproforma.Developer(profit_factor=developer_configuration["profit_factor"])
    for form in spotproforma.forms.keys():
        far_predictions[form + "_feasiblefar"], far_predictions[form + "_profit"] = dev.lookup(
            form,
            avgrents[spotproforma.uses].as_matrix(),
            far_predictions.current_yearly_rent_buildings,
            far_predictions.parcelsize,
        )

    # we now have a far prediction per parcel by allowable building type!

    #################DEVCONSTRAINTS:  Obtain zoning and other development constraints #####
    zoning = dset.fetch("zoning")
    fars = dset.fetch("fars")
    max_parcel_sqft = 200000
    max_far_field = developer_configuration["max_allowable_far_field_name"]
    if max_far_field not in parcels.columns:
        parcels = pd.merge(parcels, fars, left_on="far_id", right_index=True)
        if developer_configuration["enforce_environmental_constraints"]:
            parcels[max_far_field] = parcels[max_far_field] * (
                1 - parcels.prop_constrained
            )  # Adjust allowable FAR to account for undevelopable proportion of parcel land
        if developer_configuration["enforce_ugb"]:
            parcels[max_far_field][parcels.in_ugb == 0] = (
                parcels[max_far_field][parcels.in_ugb == 0] * developer_configuration["outside_ugb_allowable_density"]
            )
        if developer_configuration["uga_policies"]:
            parcels[max_far_field][parcels.in_uga == 1] = (
                parcels[max_far_field][parcels.in_ugb == 1] * developer_configuration["inside_uga_allowable_density"]
            )
        parcels.loc[
            parcels.parcel_sqft < developer_configuration["min_lot_sqft"], "max_far_field"
        ] = 0  # fixed chained index error
        parcels.loc[parcels.parcel_sqft > max_parcel_sqft, "max_far_field"] = 0  # fixed chained indexing error
    if "type1" not in parcels.columns:
        parcels = pd.merge(parcels, zoning, left_on="zoning_id", right_index=True)
    ##Scale allowable FARs here if needed
    if zone_args is not None:
        parcels[max_far_field] = parcels[max_far_field] * avgrents.allowable_density_factor

    ####### BUILDING TYPE DICTIONARY #####
    type_d = {"residential": [2, 3, 20, 24], "industrial": [9, 22], "retail": [17, 18], "office": [5]}

    ###MERGE ALLOWABLE DENSITY BY USE WITH FEASIBLE DENSITY BY USE (TAKE MINIMUM) TO ARRIVE AT A PARCEL PREDICTION
    # we have zoning by like 16+ building types and rents/far predictions by 4 more aggregate building types
    # so we have to convert one into the other
    parcel_predictions = pd.DataFrame(index=parcels.index)

    parcel_predictions["county_id"] = parcels.county_id

    for typ, btypes in type_d.iteritems():
        for btype in btypes:
            # three questions - 1) is type allowed 2) what FAR is allowed 3) is it supported by rents
            if developer_configuration["enforce_allowable_use_constraints"]:
                tmp = parcels[parcels["type%d" % btype] == 1][[max_far_field]]

                # is type allowed
                far_predictions["type%d_zonedfar" % btype] = tmp[max_far_field]  # at what far
            else:
                far_predictions["type%d_zonedfar" % btype] = parcels[max_far_field]
            # merge zoning with feasibility
            tmp.index.name = "parcel_id"
            tmp = pd.merge(
                tmp, far_predictions[[typ + "_feasiblefar"]], left_index=True, right_index=True, how="left"
            ).set_index(tmp.index)
            # min of zoning and feasibility

            parcel_predictions[btype] = pd.Series(
                np.minimum(tmp[max_far_field], tmp[typ + "_feasiblefar"]), index=tmp.index
            )
            # avgrents2=avgrents.ix[parcels['type%d'%btype]==1]
            # profit=dev.profit(typ,avgrent2s[spotproforma.uses].as_matrix(),far_predictions.current_yearly_rent_buildings,parcel_prediction[btype])

            # print profit
            # parcel_predictions[btype+'_profit']=pd.Series(profit,index=tmp.index)
    parcel_predictions = parcel_predictions.dropna(how="all").sort_index(axis=1)

    for col in parcel_predictions.columns:
        print col, (
            parcel_predictions[col] * far_predictions.parcelsize
        ).sum() / 1000000.0  ###LIMITING PARCEL PREDICTIONS TO 1MILLION SQFT

    ####SELECTING SITES
    np.random.seed(1)
    p_sample_proportion = 0.5
    parcel_predictions = parcel_predictions.ix[
        np.random.choice(
            parcel_predictions.index, int(len(parcel_predictions.index) * p_sample_proportion), replace=False
        )
    ]
    parcel_predictions.index.name = "parcel_id"

    parcel_predictions.to_csv(
        os.path.join(misc.data_dir(), "parcel_predictions.csv"), index_col="parcel_id", float_format="%.2f"
    )
    # far_predictions.to_csv(os.path.join(misc.data_dir(),'far_predictions.csv'),index_col='parcel_id',float_format="%.2f")

    #####CALL TO THE DEVELOPER
    newbuildings, price_shifters = new_developer.run(
        dset,
        hh_zone_diff,
        emp_zone_diff,
        parcel_predictions,
        year=sim_year,
        min_building_sqft=developer_configuration["min_building_sqft"],
        min_lot_sqft=developer_configuration["min_lot_sqft"],
        max_lot_sqft=max_parcel_sqft,
        zone_args=zone_args,
        tot_sqft=dset.zones[["residential_sqft_zone", "non_residential_sqft_zone"]],
    )

    #####APPLY PRICE SHIFTS (PSEUDO-EQUILIBRATION) [MAKE THIS OPTIONAL]
    print "Applying price shifts"
    pshift_btypes = []
    pshift_zone = []
    pshift_shift = []
    for item in price_shifters.items():
        pshift_btypes.append(item[0][0])
        pshift_zone.append(item[0][1])
        pshift_shift.append(item[1])
    pshift = pd.DataFrame({"btype": pshift_btypes, "zone": pshift_zone, "shift_amount": pshift_shift})
    buildings["zone_id"] = parcels.loc[buildings.parcel_id, "zone_id"].values
    buildings["bid"] = buildings.index.values
    buildings = pd.merge(
        buildings, pshift, left_on=["building_type_id", "zone_id"], right_on=["btype", "zone"], how="left"
    )
    buildings.shift_amount = buildings.shift_amount.fillna(1.0)
    buildings.unit_price_residential = buildings.unit_price_residential * buildings.shift_amount
    # buildings.unit_price_non_residential = buildings.unit_price_non_residential*buildings.shift_amount
    buildings.index = buildings.bid

    ##When net residential units is less than 0, need to implement building demolition
    newbuildings = newbuildings[["zone_id", "building_type_id", "building_sqft", "residential_units", "lot_size"]]
    # print newbuildings.building_sqft
    newbuildings = newbuildings.reset_index()

    newbuildings.columns = ["parcel_id", "zone_id", "building_type_id", "bldg_sq_ft", "residential_units", "land_area"]
    newbuildings.parcel_id = newbuildings.parcel_id.astype("int32")
    # newbuildings['county_id']=parcel_predictions.county_id[newbuildings.parcel_id].values  # why is this here?

    # print newbuildings[newbuildings.residential_units == 0].groupby('county_id').bldg_sq_ft.sum()
    newbuildings.residential_units = newbuildings.residential_units.astype("int32")
    newbuildings.land_area = newbuildings.land_area.astype("int32")
    newbuildings.building_type_id = newbuildings.building_type_id.astype("int32")
    newbuildings.parcel_id = newbuildings.parcel_id.astype("int32")
    newbuildings.bldg_sq_ft = np.round(newbuildings.bldg_sq_ft).astype("int32")
    newbuildings.bldg_sq_ft2 = np.round(newbuildings.bldg_sq_ft).astype("int32")

    newbuildings["non_residential_sqft"] = 0
    newbuildings.loc[newbuildings.residential_units == 0, "non_residential_sqft"] = newbuildings.bldg_sq_ft
    newbuildings["improvement_value"] = (
        newbuildings.non_residential_sqft * 100 + newbuildings.residential_units * 100000
    ).astype("int32")
    newbuildings["sqft_per_unit"] = 1400
    newbuildings.loc[newbuildings.residential_units > 0, "sqft_per_unit"] = 1000
    newbuildings["stories"] = np.ceil(newbuildings.bldg_sq_ft * 1.0 / newbuildings.land_area).astype("int32")
    newbuildings["tax_exempt"] = 0
    newbuildings["year_built"] = sim_year
    newbuildings["unit_price_residential"] = 0.0
    newbuildings.loc[newbuildings.residential_units > 0, "unit_price_residential"] = buildings[
        buildings.unit_price_residential > 0
    ].unit_price_residential.median()

    newbuildings["unit_price_res_sqft"] = 0.0
    newbuildings.loc[newbuildings.residential_units > 0, "unit_price_res_sqft"] = buildings[
        buildings.unit_price_res_sqft > 0
    ].unit_price_res_sqft.median()

    newbuildings["unit_price_non_residential"] = 0.0
    newbuildings.loc[newbuildings.non_residential_sqft > 0, "unit_price_non_residential"] = buildings[
        buildings.unit_price_non_residential > 0
    ].unit_price_non_residential.median()

    ##### XG: originally, impose exogenous prices for new buildings. Now impose average county price
    # newbuildings['county_id'] = dset.parcels.county_id[newbuildings.parcel_id].values  # improper join - index incorrect
    newbuildings["county_id"] = parcels.loc[newbuildings.parcel_id, "county_id"].values

    # buildings['county_id'] = dset.parcels.county_id[buildings.parcel_id].values  # improper join - index incorrect
    buildings["county_id"] = parcels.loc[buildings.parcel_id, "county_id"].values
    u = pd.DataFrame(
        buildings[(buildings.bldg_sq_ft2 > 0) * (np.in1d(buildings.building_type_id, [2, 3, 20, 24]))]
        .groupby("county_id")
        .unit_price_res_sqft.mean()
    )
    u.columns = ["res_price_county"]

    newbuildings = pd.merge(newbuildings, u, left_on="county_id", right_index=True)

    u = pd.DataFrame(
        buildings[(buildings.non_residential_sqft > 0) * (np.in1d(buildings.building_type_id, [5, 9, 17, 18, 22]))]
        .groupby("county_id")
        .unit_price_non_residential.mean()
    )
    u.columns = ["nres_price_county"]
    newbuildings = pd.merge(newbuildings, u, left_on="county_id", right_index=True)

    u = pd.DataFrame(buildings.groupby("county_id").unit_price_residential.mean())
    u.columns = ["unit_res_price_county"]
    newbuildings = pd.merge(newbuildings, u, left_on="county_id", right_index=True)

    newbuildings.loc[
        (newbuildings.bldg_sq_ft > 0) * (np.in1d(newbuildings.building_type_id, [2, 3, 20, 24])),
        "unit_price_residential",
    ] = newbuildings.unit_res_price_county
    newbuildings.loc[
        (newbuildings.bldg_sq_ft > 0) * (np.in1d(newbuildings.building_type_id, [2, 3, 20, 24])), "unit_price_res_sqft"
    ] = newbuildings.res_price_county
    newbuildings.loc[
        (newbuildings.non_residential_sqft > 0) * (np.in1d(newbuildings.building_type_id, [5, 9, 17, 18, 22])),
        "unit_price_non_residential",
    ] = newbuildings.nres_price_county
    # print newbuildings[(np.in1d(newbuildings.building_type_id,[2,3,20,24]))*(newbuildings['bldg_sq_ft']>0)].groupby('county_id').unit_price_res_sqft.mean()
    #### end XG

    newbuildings["building_sqft_per_job"] = 250.0  #####Need to replace with observed
    newbuildings["non_residential_units"] = (
        newbuildings.non_residential_sqft / newbuildings.building_sqft_per_job
    ).fillna(0)
    newbuildings["base_year_jobs"] = 0.0
    newbuildings["all_units"] = newbuildings.non_residential_units + newbuildings.residential_units

    newbuildings.non_residential_sqft = newbuildings.non_residential_sqft.astype("int32")
    newbuildings.tax_exempt = newbuildings.tax_exempt.astype("int32")
    newbuildings.year_built = newbuildings.year_built.astype("int32")
    newbuildings.sqft_per_unit = newbuildings.sqft_per_unit.astype("int32")
    newbuildings = newbuildings.set_index(np.arange(len(newbuildings.index)) + np.amax(buildings.index.values) + 1)

    buildings = buildings[
        [
            "zone_id",
            "building_type_id",
            "improvement_value",
            "land_area",
            "non_residential_sqft",
            "parcel_id",
            "residential_units",
            "sqft_per_unit",
            "stories",
            "tax_exempt",
            "year_built",
            "bldg_sq_ft",
            "bldg_sq_ft2",
            "unit_price_non_residential",
            "unit_price_residential",
            "building_sqft_per_job",
            "non_residential_units",
            "base_year_jobs",
            "all_units",
            "unit_price_res_sqft",
        ]
    ]

    return buildings, newbuildings
Пример #24
0
def simulate(dset,
             year,
             depvar='building_id',
             alternatives=None,
             simulation_table=None,
             output_names=None,
             agents_groupby=[
                 'income_3_tenure',
             ],
             transition_config=None,
             relocation_config=None):

    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (
            ct["total_number_of_households"] *
            transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(
            ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
            hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added, 'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']

    #calculate mortgage payment values

    temp_count = 0

    buildings = alternatives
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05 / 12
    n = 360
    buildings['est_mortgage_payment'] = buildings.unit_price_residential * (
        (r * (1 + r)**n) / ((1 + r)**n - 1))

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[
            relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[
            rate_field] * .01 * relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers, rate_table, rate_field)
        choosers[depvar].ix[movers] = -1

    movers_all = choosers[choosers[depvar] == -1]
    county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),
                                                   'county_growth_share.csv'),
                                      index_col=0)
    counties = county_growth_share.columns.values
    current_growth_shares = county_growth_share.loc[year].values
    movers_counties = np.random.choice(counties,
                                       movers_all.shape[0],
                                       replace=True,
                                       p=current_growth_shares)

    movers_all['county_id'] = movers_counties

    income_segment = movers_all.groupby('income_grp')[
        'upper_income_grp_val', 'lower_income_grp_val'].agg([np.mean, np.size])
    # get county growth control data and merge with income_segements

    income_segment['county'] = county_growth_share.loc[year].index.values[0]
    income_segment['growth_share'] = county_growth_share.loc[year][0]
    copy_df = income_segment.copy()
    for i in county_growth_share.loc[year][1:].iteritems():

        copy_df['county'] = i[0]
        copy_df['growth_share'] = i[1]
        income_segment = pd.concat([income_segment, copy_df])

    income_segment = income_segment.set_index(['county', income_segment.index])

    print "Total new agents and movers = %d" % len(movers_all.index)

    for seg in income_segment.iterrows():

        movers = movers_all[(movers_all['income'] <= seg[1][0])
                            & (movers_all['income'] >= seg[1][2])]
        print 'County: %s. Placing %d households in the income range (%d, %d)' % (
            seg[0][0], seg[1][1], seg[1][2], seg[1][0])

        empty_units = buildings.residential_units.sub(choosers[
            choosers['building_id'] != -1].groupby('building_id').size(),
                                                      fill_value=0)
        empty_units = empty_units[empty_units > 0].order(ascending=False)
        print 'number of empty units is %d' % empty_units.sum()
        alternatives = buildings.ix[np.repeat(
            empty_units.index.values, empty_units.values.astype('int'))]
        alternatives = alternatives[alternatives.county_id == int(seg[0][0])]

        if ((seg[1][2] / 12) <= 0):
            alts = alternatives[
                alternatives['unit_price_residential'] < 186281]
        elif ((seg[1][2] / 12) >= 55000):
            alts = alternatives[
                alternatives['unit_price_residential'] > 1583400]
        else:
            alts = alternatives[alternatives['est_mortgage_payment'] /
                                (seg[1][2] / 12) <= 0.33]
        if (alts.shape[0] == 0):
            homeless = pd.concat([choosers, homeless])
            print 'Could not place %d households due to income restrictions' % seg[
                1][1]
            continue

        pdf = pd.DataFrame(index=alts.index)

        segments = movers.groupby(agents_groupby)

        ##simulation
        for name, segment in segments:
            segment = segment.head(1)
            name = str(name)
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv % name, output_title % name, coeff_name % name
            ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(
                dset.coeffs[(tmp_coeffname,
                             'fnames')].isnull().values)].values.tolist()
            SAMPLE_SIZE = alts.index.size
            numchoosers = segment.shape[0]
            numalts = alts.shape[0]
            sample = np.tile(alts.index.values, numchoosers)
            alts_sample = alts
            alts_sample['join_index'] = np.repeat(segment.index.values,
                                                  SAMPLE_SIZE)
            alts_sample = pd.merge(alts_sample,
                                   segment,
                                   left_on='join_index',
                                   right_index=True,
                                   suffixes=('', '_r'))
            chosen = np.zeros((numchoosers, SAMPLE_SIZE))
            chosen[:, 0] = 1
            sample, alternative_sample, est_params = sample, alts_sample, (
                'mnl', chosen)
            ##Interaction variables
            interaction_vars = [(var, var.split('_x_')) for var in ind_vars
                                if '_x_' in var]
            for ivar in interaction_vars:
                if ivar[1][0].endswith('gt'):
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) >
                        alternative_sample[ivar[1][1]]).astype('int32')
                if ivar[1][0].endswith('lt'):
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) <
                        alternative_sample[ivar[1][1]]).astype('int32')
                else:
                    alternative_sample[ivar[0]] = (
                        (alternative_sample[ivar[1][0]]) *
                        alternative_sample[ivar[1][1]])

            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data
            data = data.as_matrix()
            coeff = dset.load_coeff(tmp_coeffname)
            probs = interaction.mnl_simulate(data,
                                             coeff,
                                             numalts=SAMPLE_SIZE,
                                             returnprobs=1)
            pdf['segment%s' % name] = pd.Series(probs.flatten(),
                                                index=alts.index)

        new_homes = pd.Series(np.ones(len(movers.index)) * -1,
                              index=movers.index)
        for name, segment in segments:
            name_coeff = str(name)
            name = str(name)
            p = pdf['segment%s' % name]
            mask = np.zeros(len(alts.index), dtype='bool')
            mask = pd.Series(mask, index=alts.index)

            print "Assigning units to %d agents of segment %s" % (len(
                segment.index), name)

            def choose(p,
                       mask,
                       alternatives,
                       segment,
                       new_homes,
                       minsize=None):
                p = copy.copy(p)
                p.loc[mask[mask == True].index] = 0  # already chosen
                try:
                    indexes = np.random.choice(alternatives.index.values,
                                               len(segment.index),
                                               replace=False,
                                               p=p.values / p.values.sum())
                except:
                    print "WARNING: not enough options to fit agents, will result in unplaced agents"
                    indexes = np.random.choice(alternatives.index.values,
                                               len(alternatives.index.values),
                                               replace=False,
                                               p=p.values / p.values.sum())

                    if (new_homes.ix[segment[segment.tenure == 2].index.
                                     values[:len(alternatives.index.values)]].
                            shape[0] != 0):
                        new_homes.ix[
                            segment[segment.tenure == 2].index.
                            values[:len(alternatives.index.values)]] = -2
                    else:
                        new_homes.ix[segment.index.values[:len(
                            alternatives.index.values
                        )]] = alternatives.index.values

                    mask.loc[indexes] = True
                    return mask, new_homes

                new_homes.ix[segment.index] = alternatives.loc[
                    indexes].index.values[:len(new_homes.ix[segment.index])]
                mask.loc[indexes] = True

                return mask, new_homes

            mask, new_homes = choose(p, mask, alts, segment, new_homes)

        build_cnts = new_homes.value_counts(
        )  #num households place in each building
        print "Assigned %d agents to %d locations with %d unplaced" % (
            new_homes.size, build_cnts.size, build_cnts.get(-1, 0))

        table = dset.households  # need to go back to the whole dataset
        table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
        #table.to_sql('tmp_out', engine, if_exists='append')
        table = table.ix[new_homes.index]
        out_table = pd.concat([table, out_table])
        choosers.loc[table.index] = table
        #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
        # old_building_count = buildings.shape[0]
        # buildings = buildings.drop(new_homes.index)
        # new_building_count = buildings.shape[0]
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if (temp_count > 50):
            break
    #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv')
    dset.households.loc[out_table.index] = out_table
    def estimate_non_res_elasticity(self,zones):

        dummies = pd.get_dummies(zones.county)
        zones = pd.concat([zones, dummies], axis=1)
        zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean() #use far_x because Xavier's code adds far to buildings
        #zones = zones[zones.non_residential_sqft_zone>0]

        ####spatial weights matrix#####
        #zones = zones.reset_index()
        #zone_coord = zones[['zone_id','zonecentroid_x', 'zonecentroid_y']]

        #zone_coord = zone_coord.as_matrix()

        wqueen = py.queen_from_shapefile(os.path.join(misc.data_dir(),'shapefiles\\zones.shp'))
        #w = py.weights.Distance.DistanceBand(zone_coord, threshold = 50000, binary = False)
        #w.transform ='r'
        #w = py.weights.weights.W(w.neighbors, w.weights)
        w = py.weights.weights.W(wqueen.neighbors, wqueen.weights)
        x = zones[['zonal_emp','residential_units_zone']]
        x = x.apply(np.log1p)
        #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min']
        #x['zone_contains_park'] = zones['zone_contains_park']
        x['percent_younghead'] = zones['percent_younghead']
        x['Arapahoe'] = zones['Arapahoe']
        x['Boulder'] = zones['Boulder']
        x['Broomfield'] = zones['Broomfield']
        x['Clear Creek'] = zones['Clear Creek']
        x['Denver'] = zones['Denver']
        x['Douglas'] = zones['Douglas']
        x['Elbert'] = zones['Elbert']
        x['Gilpin'] = zones['Gilpin']
        x['Jefferson'] = zones['Jefferson']
        x['Weld'] = zones['Weld']
        x=x.fillna(0)
        x = x.as_matrix()

        imat = zones[['ln_avg_unit_price_zone','avg_far']]
        imat = imat.fillna(0)
        imat = imat.as_matrix()

        yend = zones['ln_avg_nonres_unit_price_zone']
        yend = yend.fillna(0)
        yend = yend.as_matrix()
        yend = np.reshape(yend,(zones.shape[0],1))

        y = zones['non_residential_sqft_zone']
        y = y.fillna(0)
        y = y.apply(np.log1p)
        y = y.as_matrix()
        y = np.reshape(y,(zones.shape[0],1))


        imat_names = ['res_price','avg_far']
        x_names = ['zonal_emp', 'residential_units_zone', 'percent_younghead','Arapahoe','Boulder','Broomfield','Clear Creek', 'Denver', 'Douglas','Elbert','Gilpin','Jefferson','Weld']
        yend_name = ['ln_avg_nonres_unit_price_zone']
        y_name = 'non_residential_sqft_zone'

        reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, w_lags=2,robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name)

        #
        # ######estimation
        # x = zones[['zonal_emp','residential_units_zone']]
        # x = x.apply(np.log1p)
        # #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min']
        # #x['zone_contains_park'] = zones['zone_contains_park']
        # x['percent_younghead'] = zones['percent_younghead']
        # x=x.fillna(0)
        # x = x.as_matrix()
        #
        # imat = zones[['ln_avg_unit_price_zone','ln_avg_land_value_per_sqft_zone','median_year_built']]
        # imat = imat.fillna(0)
        # imat = imat.as_matrix()
        #
        # yend = zones['ln_avg_nonres_unit_price_zone']
        # yend = yend.fillna(0)
        # yend = yend.as_matrix()
        # yend = np.reshape(yend,(zones.shape[0],1))
        #
        # y = zones['non_residential_sqft_zone']
        # y = y.fillna(0)
        # y = y.apply(np.log1p)
        # y = y.as_matrix()
        # y = np.reshape(y,(zones.shape[0],1))
        #
        #
        # imat_names = ['res_price','land_value','median_year_built']
        # x_names = ['zonal_emp', 'residential_units_zone', 'percent_younghead']
        # yend_name = ['ln_avg_nonres_unit_price_zone']
        # y_name = 'non_residential_sqft_zone'
        #
        # reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name)
        #
        #
        demand_elasticity = np.absolute(reg_2sls.betas[14])
        demand_elasticity = 1/demand_elasticity[0]
        #
        return demand_elasticity
Пример #26
0
    #
    # dset.establishments.loc[series.index, "building_id"] = selected_ids
    # dset.establishments.loc[series.index, "zone_id"] = zone_ids

def test_fnc(df, testParam1, testParam2):
    print type(df)
    print type(testParam1)
    print type(testParam2)
    #print type(testParam3)

if __name__ == '__main__':
    from drcog.models import dataset
    from drcog.variables import variable_library
    import os
    import cProfile
    dset = dataset.DRCOGDataset(os.path.join(misc.data_dir(),'drcog.h5'))

    #Load estimated coefficients
    coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs.h5'))
    dset.coeffs = coeff_store.coeffs.copy()
    coeff_store.close()

    coeff_store = pd.HDFStore(os.path.join(misc.data_dir(),'coeffs_res.h5'))
    dset.coeffs_res = coeff_store.coeffs_res.copy()
    coeff_store.close()

    variable_library.calculate_variables(dset)

    sim_year = 2011
    alternatives = dset.buildings[(dset.buildings.non_residential_sqft>0)]
    simulate(dset, year=sim_year,depvar = 'building_id',alternatives=alternatives,simulation_table = 'establishments',output_names = ("drcog-coeff-elcm-%s.csv","DRCOG EMPLOYMENT LOCATION CHOICE MODELS (%s)","emp_location_%s","establishment_building_ids"),
Пример #27
0
def run(dset, current_year):
    """Refines zone level model results
    """

    b = dset.buildings
    p = dset.parcels
    if p.index.name != 'parcel_id':
       p = p.set_index('parcel_id')

    z = dset.zones


    e = dset.establishments
    hh = dset.households
    zone_refine = pd.read_csv(os.path.join(misc.data_dir(),'zone_demand_refine_no_broomfield.csv'))
    shuffled_hh_id = np.random.shuffle(hh.index.values)
    shuffled_emp_id = np.random.shuffle(e.index.values)
    
    def relocate_agents(agents_joined,zone_id,number_of_agents):
        agent_pool = agents_joined[agents_joined.zone_id!=zone_id]
        #shuffled_ids = agent_pool.index.values
        #np.random.shuffle(shuffled_ids)
        #agents_to_relocate = shuffled_ids[:number_of_agents]
        #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate)

        random_sample = random.sample(agent_pool.index, number_of_agents)

        # new_building_id = b[b.zone_id==zone_id].index.values[0]
        # dset.households.building_id[idx_agents_to_relocate] = new_building_id
        #try:
        new_building_id = b[b.zone_id==zone_id].index.values[0]
        agents_joined.loc[random_sample, "building_id"] = new_building_id
        # except:
        #     print 'No buildings in specified zone.'
        #     if zone_id not in dset.parcels.zone_id.values:
        #         county = z.county.values[z.index.values==zone_id][0]
        #         x = z.zonecentroid_x.values[z.index.values==zone_id][0]
        #         y = z.zonecentroid_y.values[z.index.values==zone_id][0]
        #         if county == 'Denver':
        #             county_id = 8031
        #         elif county == 'Adams':
        #             county_id = 8001
        #         elif county == 'Arapahoe':
        #             county_id = 8005
        #         elif county == 'Boulder':
        #             county_id = 8013
        #         elif county == 'Broomfield':
        #             county_id = 8014
        #         elif county == 'Clear Creek':
        #             county_id = 8019
        #         elif county == 'Douglas':
        #             county_id = 8035
        #         elif county == 'Elbert':
        #             county_id = 8039
        #         elif county == 'Gilpin':
        #             county_id = 8047
        #         elif county == 'Jefferson':
        #             county_id = 8059
        #         elif county == 'Weld':
        #             county_id = 8123
        #         pid = p.index.values.max()+1
        #         newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id],
        #                                      'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0],
        #                                      'prop_constrained':[0.0],'acres':[1.0] })
        #         newparcel.index = np.array([pid])
        #         dset.d['parcels'] = pd.concat([p,newparcel])
        #         dset.parcels.index.name = 'parcel_id'
        #     else:
        #         pid = p.index.values[p.zone_id==zone_id][0]
        #     print 'Constructing small structure to place agents'
        #     new_building_id = dset.buildings.index.values.max() + 1
        #     newbuildings = pd.DataFrame({'building_type_id':[20],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[0],
        #                                  'parcel_id':[pid],'residential_units':[2],'sqft_per_unit':[250],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500],
        #                                  'unit_price_non_residential':[0.0],'unit_price_residential':[5000.0], 'building_sqft_per_job':[0.0],
        #                                  'non_residential_units':[0],'base_year_jobs':[0.0],'all_units':[2]})
        #     newbuildings.index = np.array([new_building_id])
        #     dset.d['buildings'] = pd.concat([dset.buildings,newbuildings])
        #     dset.buildings.index.name = 'building_id'
        #     agents_joined.building_id[idx_agents_to_relocate] = new_building_id

    def unplace_agents(agents_joined,zone_id,number_of_agents):
        number_of_agents = -number_of_agents #flip the sign
        agent_pool = agents_joined[agents_joined.zone_id==zone_id] ##Notice the equality instead of disequality
        if len(agent_pool) > number_of_agents:
            #shuffled_ids = agent_pool.index.values
            #np.random.shuffle(shuffled_ids)
            #agents_to_relocate = shuffled_ids[:number_of_agents]
            #idx_agents_to_relocate = np.in1d(dset.households.index.values,agents_to_relocate)
            random_sample = random.sample(agent_pool.index, number_of_agents)
            dset.households.building_id[random_sample] = -1 #unplace

    def relocate_estabs(agents_joined,zone_id,number_of_agents):
        agent_pool = agents_joined[(agents_joined.zone_id!=zone_id)]
        e_sample = agent_pool.reindex(np.random.permutation(agent_pool.index))
        e_to_move = e_sample[np.cumsum(e_sample['employees'].values)<abs(number_of_agents+10)]
        shuffled_ids = e_to_move.index.values
        #np.random.shuffle(shuffled_ids)
        agents_to_relocate = shuffled_ids
        idx_agents_to_relocate = np.in1d(dset.establishments.index.values,agents_to_relocate)

        # new_building_id = b[b.zone_id==zone_id].index.values[0]
        # dset.establishments.building_id[idx_agents_to_relocate] = new_building_id
        #try:
        new_building_id = b[b.zone_id==zone_id].index.values[0]
        agents_joined.loc[idx_agents_to_relocate, "building_id"] = new_building_id  # corrected chain index error
        # except:
        #     print 'No buildings in specified zone.'
        #     if zone_id not in dset.parcels.zone_id.values:
        #         county = z.county.values[z.index.values==zone_id][0]
        #         x = z.zonecentroid_x.values[z.index.values==zone_id][0]
        #         y = z.zonecentroid_y.values[z.index.values==zone_id][0]
        #         if county == 'Denver':
        #             county_id = 8031
        #         elif county == 'Adams':
        #             county_id = 8001
        #         elif county == 'Arapahoe':
        #             county_id = 8005
        #         elif county == 'Boulder':
        #             county_id = 8013
        #         elif county == 'Broomfield':
        #             county_id = 8014
        #         elif county == 'Clear Creek':
        #             county_id = 8019
        #         elif county == 'Douglas':
        #             county_id = 8035
        #         elif county == 'Elbert':
        #             county_id = 8039
        #         elif county == 'Gilpin':
        #             county_id = 8047
        #         elif county == 'Jefferson':
        #             county_id = 8059
        #         elif county == 'Weld':
        #             county_id = 8123
        #         pid = p.index.values.max()+1
        #         newparcel = pd.DataFrame({'county_id':[county_id],'parcel_sqft':[43560],'land_value':[0],'zone_id':[zone_id],
        #                                      'centroid_x':[x],'centroid_y':[y],'x':[x],'y':[y],'dist_bus':[6000],'dist_rail':[6000],'in_ugb':[1],'in_uga':[0],
        #                                      'prop_constrained':[0.0],'acres':[1.0] })
        #         newparcel.index = np.array([pid])
        #         dset.d['parcels'] = pd.concat([p,newparcel])
        #         dset.parcels.index.name = 'parcel_id'
        #     else:
        #         pid = p.index.values[p.zone_id==zone_id][0]
        #     print 'Constructing small structure to place agents'
        #     new_building_id = dset.buildings.index.values.max() + 1
        #     newbuildings = pd.DataFrame({'building_type_id':[4],'improvement_value':[10000],'land_area':[200],'non_residential_sqft':[500],
        #                                  'parcel_id':[pid],'residential_units':[0],'sqft_per_unit':[0],'stories':[0],'tax_exempt':[0],'year_built':[2000],'bldg_sq_ft':[500],
        #                                  'unit_price_non_residential':[2.0],'unit_price_residential':[0.0], 'building_sqft_per_job':[250.0],
        #                                  'non_residential_units':[2],'base_year_jobs':[0.0],'all_units':[2]})
        #     newbuildings.index = np.array([new_building_id])
        #     dset.d['buildings'] = pd.concat([dset.buildings,newbuildings])
        #     dset.buildings.index.name = 'building_id'
        #     agents_joined.loc[idx_agents_to_relocate, "building_id"] = new_building_id  # corrected chain index error

    def unplace_estabs(agents_joined,zone_id,number_of_agents):
        number_of_agents = -number_of_agents #flip the sign
        agent_pool = agents_joined[agents_joined.zone_id==zone_id] ##Notice the equality instead of disequality
        if agent_pool.employees.sum() > number_of_agents:
            e_sample = agent_pool.reindex(np.random.permutation(agent_pool.index))
            e_to_move = e_sample[np.cumsum(e_sample['employees'].values)<abs(number_of_agents)]
            shuffled_ids = e_to_move.index.values
            np.random.shuffle(shuffled_ids)
            agents_to_relocate = shuffled_ids
            idx_agents_to_relocate = np.in1d(dset.establishments.index.values,agents_to_relocate)
            dset.establishments.building_id[idx_agents_to_relocate] = -1 #unplace

    # for zone in zone_refine.zone_id.values:
    #     idx_zone = (zone_refine.zone_id==zone)
    #     hh_shift = zone_refine.annual_hh_shift[idx_zone].values[0]
    #     emp_shift = zone_refine.annual_emp_shift[idx_zone].values[0]
    #     if hh_shift > 0:
    #         relocate_agents(hh,zone,hh_shift)
    #     if emp_shift > 0:
    #         relocate_estabs(e,zone,emp_shift)
    #     if current_year < 2040:
    #         if hh_shift < 0:
    #             unplace_agents(hh,zone,hh_shift)
    #         if emp_shift < 0:
    #             unplace_agents(e,zone,emp_shift)
    def refine(series):
        hh_shift = series.annual_hh_shift
        emp_shift = series.annual_emp_shift
        zone = series.zone_id
        if hh_shift > 0:
            relocate_agents(hh,zone,hh_shift)
        if emp_shift > 0:
            relocate_estabs(e,zone,emp_shift)
        if current_year < 2040:
            if hh_shift < 0:
                unplace_agents(hh,zone,hh_shift)
            if emp_shift < 0:
                unplace_estabs(e,zone,emp_shift)



    zone_refine.apply(refine, axis=1)
Пример #28
0
    def estimate_non_res_elasticity(self, zones):

        dummies = pd.get_dummies(zones.county)
        zones = pd.concat([zones, dummies], axis=1)
        zones['avg_far'] = self.buildings_far.groupby('zone_id').far.mean(
        )  #use far_x because Xavier's code adds far to buildings
        #zones = zones[zones.non_residential_sqft_zone>0]

        ####spatial weights matrix#####
        #zones = zones.reset_index()
        #zone_coord = zones[['zone_id','zonecentroid_x', 'zonecentroid_y']]

        #zone_coord = zone_coord.as_matrix()

        wqueen = py.queen_from_shapefile(
            os.path.join(misc.data_dir(), 'shapefiles\\zones.shp'))
        #w = py.weights.Distance.DistanceBand(zone_coord, threshold = 50000, binary = False)
        #w.transform ='r'
        #w = py.weights.weights.W(w.neighbors, w.weights)
        w = py.weights.weights.W(wqueen.neighbors, wqueen.weights)
        x = zones[['zonal_emp', 'residential_units_zone']]
        x = x.apply(np.log1p)
        #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min']
        #x['zone_contains_park'] = zones['zone_contains_park']
        x['percent_younghead'] = zones['percent_younghead']
        x['Arapahoe'] = zones['Arapahoe']
        x['Boulder'] = zones['Boulder']
        x['Broomfield'] = zones['Broomfield']
        x['Clear Creek'] = zones['Clear Creek']
        x['Denver'] = zones['Denver']
        x['Douglas'] = zones['Douglas']
        x['Elbert'] = zones['Elbert']
        x['Gilpin'] = zones['Gilpin']
        x['Jefferson'] = zones['Jefferson']
        x['Weld'] = zones['Weld']
        x = x.fillna(0)
        x = x.as_matrix()

        imat = zones[['ln_avg_unit_price_zone', 'avg_far']]
        imat = imat.fillna(0)
        imat = imat.as_matrix()

        yend = zones['ln_avg_nonres_unit_price_zone']
        yend = yend.fillna(0)
        yend = yend.as_matrix()
        yend = np.reshape(yend, (zones.shape[0], 1))

        y = zones['non_residential_sqft_zone']
        y = y.fillna(0)
        y = y.apply(np.log1p)
        y = y.as_matrix()
        y = np.reshape(y, (zones.shape[0], 1))

        imat_names = ['res_price', 'avg_far']
        x_names = [
            'zonal_emp', 'residential_units_zone', 'percent_younghead',
            'Arapahoe', 'Boulder', 'Broomfield', 'Clear Creek', 'Denver',
            'Douglas', 'Elbert', 'Gilpin', 'Jefferson', 'Weld'
        ]
        yend_name = ['ln_avg_nonres_unit_price_zone']
        y_name = 'non_residential_sqft_zone'

        reg_2sls = py.spreg.twosls_sp.GM_Lag(y,
                                             x,
                                             yend=yend,
                                             q=imat,
                                             w=w,
                                             w_lags=2,
                                             robust='white',
                                             name_x=x_names,
                                             name_q=imat_names,
                                             name_y=y_name,
                                             name_yend=yend_name)

        #
        # ######estimation
        # x = zones[['zonal_emp','residential_units_zone']]
        # x = x.apply(np.log1p)
        # #x['ln_emp_aggsector_within_5min'] = zones['ln_emp_aggsector_within_5min']
        # #x['zone_contains_park'] = zones['zone_contains_park']
        # x['percent_younghead'] = zones['percent_younghead']
        # x=x.fillna(0)
        # x = x.as_matrix()
        #
        # imat = zones[['ln_avg_unit_price_zone','ln_avg_land_value_per_sqft_zone','median_year_built']]
        # imat = imat.fillna(0)
        # imat = imat.as_matrix()
        #
        # yend = zones['ln_avg_nonres_unit_price_zone']
        # yend = yend.fillna(0)
        # yend = yend.as_matrix()
        # yend = np.reshape(yend,(zones.shape[0],1))
        #
        # y = zones['non_residential_sqft_zone']
        # y = y.fillna(0)
        # y = y.apply(np.log1p)
        # y = y.as_matrix()
        # y = np.reshape(y,(zones.shape[0],1))
        #
        #
        # imat_names = ['res_price','land_value','median_year_built']
        # x_names = ['zonal_emp', 'residential_units_zone', 'percent_younghead']
        # yend_name = ['ln_avg_nonres_unit_price_zone']
        # y_name = 'non_residential_sqft_zone'
        #
        # reg_2sls = py.spreg.twosls_sp.GM_Lag(y, x, yend=yend, q=imat, w=w, robust ='white', name_x = x_names, name_q = imat_names, name_y = y_name, name_yend = yend_name)
        #
        #
        demand_elasticity = np.absolute(reg_2sls.betas[14])
        demand_elasticity = 1 / demand_elasticity[0]
        #
        return demand_elasticity
def simulate(dset,year,depvar = 'building_id',alternatives=None,simulation_table = None,
              output_names=None,agents_groupby = ['income_3_tenure',],transition_config=None,relocation_config=None):


    output_csv, output_title, coeff_name, output_varname = output_names

    if transition_config['Enabled']:
        ct = dset.fetch(transition_config['control_totals_table'])
        if 'persons' in ct.columns:
            del ct['persons']
        ct["total_number_of_households"] = (ct["total_number_of_households"]*transition_config['scaling_factor']).astype('int32')
        hh = dset.fetch('households')
        persons = dset.fetch('persons')
        tran = transition.TabularTotalsTransition(ct, 'total_number_of_households')
        model = transition.TransitionModel(tran)
        #import pdb; pdb.set_trace()
        new, added, new_linked = model.transition(
                hh, year, linked_tables={'linked': (persons, 'household_id')})
        new.loc[added,'building_id'] = -1
        dset.d['households'] = new
        dset.d['persons'] = new_linked['linked']




    #calculate mortgage payment values

    temp_count = 0

    buildings = alternatives
    out_table = pd.DataFrame(columns=dset.households.columns)
    homeless = pd.DataFrame(columns=dset.households.columns)
    r = .05/12
    n = 360
    buildings['est_mortgage_payment']=buildings.unit_price_residential*((r*(1+r)**n)/((1+r)**n-1))

    dset.households.index.name = 'household_id'
    choosers = dset.fetch(simulation_table)

    if relocation_config['Enabled']:
        rate_table = dset.store[relocation_config['relocation_rates_table']].copy()
        rate_field = "probability_of_relocating"
        rate_table[rate_field] = rate_table[rate_field]*.01*relocation_config['scaling_factor']
        movers = dset.relocation_rates(choosers,rate_table,rate_field)
        choosers[depvar].ix[movers] = -1

    movers_all = choosers[choosers[depvar]==-1]
    county_growth_share = pd.read_csv(os.path.join(misc.data_dir(),'county_growth_share.csv'), index_col=0 )
    counties = county_growth_share.columns.values
    current_growth_shares = county_growth_share.loc[year].values
    movers_counties = np.random.choice(counties, movers_all.shape[0], replace=True, p=current_growth_shares)

    movers_all['county_id'] = movers_counties


    income_segment = movers_all.groupby('income_grp')['upper_income_grp_val','lower_income_grp_val'].agg([np.mean, np.size])
    # get county growth control data and merge with income_segements

    income_segment['county'] = county_growth_share.loc[year].index.values[0]
    income_segment['growth_share'] = county_growth_share.loc[year][0]
    copy_df = income_segment.copy()
    for i in county_growth_share.loc[year][1:].iteritems():

        copy_df['county'] = i[0]
        copy_df['growth_share'] = i[1]
        income_segment = pd.concat([income_segment, copy_df])

    income_segment = income_segment.set_index(['county', income_segment.index])

    print "Total new agents and movers = %d" % len(movers_all.index)



    for seg in income_segment.iterrows():


        movers = movers_all[(movers_all['income']<= seg[1][0]) & (movers_all['income']>= seg[1][2])]
        print 'County: %s. Placing %d households in the income range (%d, %d)' % (seg[0][0],seg[1][1],seg[1][2], seg[1][0])

        empty_units = buildings.residential_units.sub(choosers[choosers['building_id']!=-1].groupby('building_id').size(),fill_value=0)
        empty_units = empty_units[empty_units>0].order(ascending=False)
        print 'number of empty units is %d' %empty_units.sum()
        alternatives = buildings.ix[np.repeat(empty_units.index.values,empty_units.values.astype('int'))]
        alternatives = alternatives[alternatives.county_id == int(seg[0][0])]

        if((seg[1][2]/12) <= 0):
            alts = alternatives[alternatives['unit_price_residential'] < 186281]
        elif((seg[1][2]/12) >= 55000):
            alts = alternatives[alternatives['unit_price_residential'] > 1583400]
        else:
            alts = alternatives[alternatives['est_mortgage_payment'] / (seg[1][2]/12) <= 0.33]
        if(alts.shape[0] == 0):
            homeless = pd.concat([choosers, homeless])
            print 'Could not place %d households due to income restrictions' % seg[1][1]
            continue




        pdf = pd.DataFrame(index=alts.index)

        segments = movers.groupby(agents_groupby)

        ##simulation
        for name, segment in segments:
            segment = segment.head(1)
            name = str(name)
            tmp_outcsv, tmp_outtitle, tmp_coeffname = output_csv%name, output_title%name, coeff_name%name
            ind_vars = dset.coeffs[(tmp_coeffname, 'fnames')][np.invert(dset.coeffs[(tmp_coeffname, 'fnames')].isnull().values)].values.tolist()
            SAMPLE_SIZE = alts.index.size
            numchoosers = segment.shape[0]
            numalts = alts.shape[0]
            sample = np.tile(alts.index.values,numchoosers)
            alts_sample = alts
            alts_sample['join_index'] = np.repeat(segment.index.values,SAMPLE_SIZE)
            alts_sample = pd.merge(alts_sample,segment,left_on='join_index',right_index=True,suffixes=('','_r'))
            chosen = np.zeros((numchoosers,SAMPLE_SIZE))
            chosen[:,0] = 1
            sample, alternative_sample, est_params = sample, alts_sample, ('mnl',chosen)
            ##Interaction variables
            interaction_vars = [(var, var.split('_x_')) for var in ind_vars if '_x_' in var]
            for ivar in interaction_vars:
                if ivar[1][0].endswith('gt'):
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])>alternative_sample[ivar[1][1]]).astype('int32')
                if ivar[1][0].endswith('lt'):
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])<alternative_sample[ivar[1][1]]).astype('int32')
                else:
                    alternative_sample[ivar[0]] = ((alternative_sample[ivar[1][0]])*alternative_sample[ivar[1][1]])

            est_data = pd.DataFrame(index=alternative_sample.index)
            for varname in ind_vars:
                est_data[varname] = alternative_sample[varname]
            est_data = est_data.fillna(0)
            data = est_data
            data = data.as_matrix()
            coeff = dset.load_coeff(tmp_coeffname)
            probs = interaction.mnl_simulate(data,coeff,numalts=SAMPLE_SIZE,returnprobs=1)
            pdf['segment%s'%name] = pd.Series(probs.flatten(),index=alts.index)

        new_homes = pd.Series(np.ones(len(movers.index))*-1,index=movers.index)
        for name, segment in segments:
            name_coeff = str(name)
            name = str(name)
            p=pdf['segment%s'%name]
            mask = np.zeros(len(alts.index),dtype='bool')
            mask = pd.Series(mask, index=alts.index)

            print "Assigning units to %d agents of segment %s" % (len(segment.index),name)

            def choose(p,mask,alternatives,segment,new_homes,minsize=None):
                p = copy.copy(p)
                p.loc[mask[mask==True].index] = 0 # already chosen
                try:
                    indexes = np.random.choice(alternatives.index.values,len(segment.index),replace=False,p=p.values/p.values.sum())
                except:
                    print "WARNING: not enough options to fit agents, will result in unplaced agents"
                    indexes = np.random.choice(alternatives.index.values,len(alternatives.index.values),replace=False,p=p.values/p.values.sum())

                    if(new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]].shape[0] != 0):
                        new_homes.ix[segment[segment.tenure==2].index.values[:len(alternatives.index.values)]] = -2
                    else:
                        new_homes.ix[segment.index.values[:len(alternatives.index.values)]] = alternatives.index.values

                    mask.loc[indexes] = True
                    return mask,new_homes

                new_homes.ix[segment.index] = alternatives.loc[indexes].index.values[:len(new_homes.ix[segment.index])]
                mask.loc[indexes] = True

                return mask,new_homes
            mask,new_homes = choose(p,mask,alts,segment,new_homes)

        build_cnts = new_homes.value_counts()  #num households place in each building
        print "Assigned %d agents to %d locations with %d unplaced" % (new_homes.size,build_cnts.size,build_cnts.get(-1,0))

        table = dset.households # need to go back to the whole dataset
        table[depvar].ix[new_homes.index] = new_homes.values.astype('int32')
        #table.to_sql('tmp_out', engine, if_exists='append')
        table = table.ix[new_homes.index]
        out_table = pd.concat([table, out_table])
        choosers.loc[table.index] = table
        #dset.store_attr(output_varname,year,copy.deepcopy(table[depvar]))
        # old_building_count = buildings.shape[0]
        # buildings = buildings.drop(new_homes.index)
        # new_building_count = buildings.shape[0]
        # print '%d units were filled' %(new_building_count - old_building_count)
        #buildings = buildings.drop(new_homes)
        #temp_count += 1
        if(temp_count > 50):
            break
    #out_table.to_csv('C:/users/jmartinez/documents/households_simulation_test.csv')
    dset.households.loc[out_table.index] = out_table
Пример #30
0
def add_rows(data, nrows, starting_index=None):
    """
    Add rows to data table according to a given nrows.
    New rows will have their IDs set to NaN.

    Parameters
    ----------
    data : pandas.DataFrame
    nrows : int
        Number of rows to add.
    starting_index : int, optional
        The starting index from which to calculate indexes for the new
        rows. If not given the max + 1 of the index of `data` will be used.

    Returns
    -------
    updated : pandas.DataFrame
        Table with rows added. New rows will have their index values
        set to NaN.
    added : pandas.Index
        New indexes of the rows that were added.
    copied : pandas.Index
        Indexes of rows that were copied. A row copied multiple times
        will have multiple entries.

    """
    if nrows == 0:
        return data, _empty_index(), _empty_index()

    if not starting_index:
        starting_index = data.index.values.max() + 1

    ###added code to alter age distribution per State Demographer's data

    # import migration data
    migration = pd.read_csv(os.path.join(misc.data_dir(), "NetMigrationByAge.csv"))
    # migration = pd.read_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/NetMigrationByAge.csv')
    migration.columns = ["county", "age", "net_migration"]
    migration = migration[15:90]  # only use ages that are in the households table
    migration["prob_age"] = migration["net_migration"] / migration.net_migration.sum()  # create weights array

    random_ages = np.random.choice(
        migration.age, nrows, p=migration.prob_age
    )  # randomly choose ages with with wighted pdf

    frame = pd.DataFrame()
    frame["ages"] = random_ages
    grp = frame.groupby(
        "ages"
    ).size()  # group by age to know the number of ages randomly chosen from above random choice (line 55)

    agg_list = []
    for i in grp.iteritems():
        age_val = i[0]
        age_count = i[1]
        array = np.random.choice(data[data.age_of_head == age_val].index.values, age_count)
        for j in array:
            agg_list.append(j)

    #####original code
    # i_to_copy = np.random.choice(data.index.values, nrows)   ###randomly chooses household index to copy -- could make it better by assigning a distribution to weight picks based on likely new household characteristics
    new_rows = data.loc[agg_list].copy()  # creates new dataframe of copied households
    added_index = pd.Index(np.arange(starting_index, starting_index + nrows, dtype=np.int))
    new_rows.index = added_index  # correctly assigns index

    ###temporarily export for analysis
    # new_rows.to_csv('C:/users/jmartinez/documents/Age Distribution UrbanSim/households_newdist.csv')

    return pd.concat([data, new_rows]), added_index, pd.Index(agg_list)
Пример #31
0
def calculate_variables(dset):

    ##PARCEL VARIABLES

    # XG: Fix the mismatch between zone and county
    p = dset.parcels
    del p['county_id']
    zone_county = pd.read_csv('C:\urbansim\data/TAZ_County_Table.csv')
    zone_county = zone_county.set_index('zone_id')
    zone_county = zone_county[['county_id']]
    p = pd.merge(p, zone_county, left_on='zone_id', right_index=True)
    pu = p
    #end of fix

    if p.index.name != 'parcel_id':
        p = p.set_index('parcel_id')

    print p[p.zone_id == 1725].x

    p['in_denver'] = (p.county_id == 8031).astype('int32')
    p['ln_dist_rail'] = p.dist_rail.apply(np.log1p)
    p['ln_dist_bus'] = p.dist_bus.apply(np.log1p)
    p['ln_land_value'] = p.land_value.apply(np.log1p)
    p['land_value_per_sqft'] = p.land_value * 1.0 / p.parcel_sqft
    p['rail_within_mile'] = (p.dist_rail < 5280).astype('int32')
    p['cherry_creek_school_district'] = (
        p.school_district == 8).astype('int32')
    p['acres'] = p.parcel_sqft / 43560.0
    p['ln_acres'] = (p.parcel_sqft / 43560.0).apply(np.log1p)

    #BUILDING VARIABLES
    b = dset.fetch('buildings',
                   building_sqft_per_job_table=elcm_configuration[
                       'building_sqft_per_job_table'],
                   bsqft_job_scaling=elcm_configuration['scaling_factor'])
    b = b[[
        'building_type_id', 'improvement_value', 'land_area',
        'non_residential_sqft', 'parcel_id', 'residential_units',
        'sqft_per_unit', 'stories', 'tax_exempt', 'year_built', 'bldg_sq_ft',
        'unit_price_non_residential', 'unit_price_residential'
    ]]
    b.loc[:, 'zone_id'] = p.zone_id[b.parcel_id].values

    bsqft_job = dset.building_sqft_per_job
    #bsqft_job.building_sqft_per_job = bsqft_job.building_sqft_per_job
    b = pd.merge(b,
                 bsqft_job,
                 left_on=['zone_id', 'building_type_id'],
                 right_index=True,
                 how='left')
    b["non_residential_units"] = b.non_residential_sqft / b.building_sqft_per_job  #####
    b["base_year_jobs"] = dset.establishments.groupby(
        'building_id').employees.sum()
    # things get all screwed up if you have overfull buildings
    b["non_residential_units"] = b[["non_residential_units",
                                    "base_year_jobs"]].max(axis=1)
    b["all_units"] = b.residential_units + b.non_residential_units

    b['county_id'] = p.county_id[b.parcel_id].values
    b['townhome'] = (b.building_type_id == 24).astype('int32')
    b['multifamily'] = (np.in1d(b.building_type_id, [2, 3])).astype('int32')
    b['office'] = (b.building_type_id == 5).astype('int32')
    b['retail_or_restaurant'] = (np.in1d(b.building_type_id,
                                         [17, 18])).astype('int32')
    b['industrial_building'] = (np.in1d(b.building_type_id,
                                        [9, 22])).astype('int32')
    b['residential_sqft'] = (b.bldg_sq_ft - b.non_residential_sqft)
    b['btype_hlcm'] = 1 * (b.building_type_id
                           == 2) + 2 * (b.building_type_id == 3) + 3 * (
                               b.building_type_id == 20) + 4 * np.invert(
                                   np.in1d(b.building_type_id, [2, 3, 20]))
    b['county8001'] = (b.county_id == 8001).astype('int32')
    b['county8005'] = (b.county_id == 8005).astype('int32')
    b['county8013'] = (b.county_id == 8013).astype('int32')
    b['county8014'] = (b.county_id == 8014).astype('int32')
    b['county8019'] = (b.county_id == 8019).astype('int32')
    b['county8031'] = (b.county_id == 8031).astype('int32')
    b['county8035'] = (b.county_id == 8035).astype('int32')
    b['county8039'] = (b.county_id == 8039).astype('int32')
    b['county8047'] = (b.county_id == 8047).astype('int32')
    b['county8059'] = (b.county_id == 8059).astype('int32')
    b['county8123'] = (b.county_id == 8123).astype('int32')
    b['unit_price_res_sqft'] = b[
        b.residential_units > 0].unit_price_residential / b[
            b.residential_units > 0].bldg_sq_ft
    p['nonres_far'] = (b.groupby('parcel_id').non_residential_sqft.sum() /
                       p.acres).apply(np.log1p)
    p['ln_units_per_acre'] = (b.groupby('parcel_id').residential_units.sum() /
                              p.acres).apply(np.log1p)

    #HOUSEHOLD VARIABLES
    hh_estim = dset.fetch('households_for_estimation')
    hh_estim['tenure'] = 1
    hh_estim.loc[hh_estim.own > 1,
                 "tenure"] = 2  # corrected chained index error
    hh_estim['income'] = 0
    hh_estim.loc[hh_estim.income_group == 1,
                 "income"] = 7500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 2,
                 "income"] = 17500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 3,
                 "income"] = 25000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 4,
                 "income"] = 35000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 5,
                 "income"] = 45000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 6,
                 "income"] = 55000  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 7,
                 "income"] = 67500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 8,
                 "income"] = 87500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 9,
                 "income"] = 117500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 10,
                 "income"] = 142500  # corrected chained index error
    hh_estim.loc[hh_estim.income_group == 11,
                 "income"] = 200000  # corrected chained index error

    hh = dset.fetch('households')
    for table in [hh_estim, hh]:
        choosers = table
        choosers['zone_id'] = b.zone_id[choosers.building_id].values
        choosers['building_type_id'] = b.building_type_id[
            choosers.building_id].values
        choosers['county_id'] = b.county_id[choosers.building_id].values
        choosers['btype'] = 1 * (choosers.building_type_id == 2) + 2 * (
            choosers.building_type_id
            == 3) + 3 * (choosers.building_type_id == 20) + 4 * np.invert(
                np.in1d(choosers.building_type_id, [2, 3, 20]))
        choosers['income_3_tenure'] = 1 * (choosers.income < 60000) * (
            choosers.tenure == 1) + 2 * np.logical_and(
                choosers.income >= 60000, choosers.income < 120000
            ) * (choosers.tenure == 1) + 3 * (choosers.income >= 120000) * (
                choosers.tenure == 1) + 4 * (choosers.income < 40000) * (
                    choosers.tenure == 2) + 5 * (choosers.income >= 40000) * (
                        choosers.tenure == 2)
        choosers['younghead'] = choosers.age_of_head < 30
        choosers['hh_with_child'] = choosers.children > 0
        choosers['ln_income'] = choosers.income.apply(np.log1p)
        choosers['income5xlt'] = choosers.income * 5.0
        choosers['income10xlt'] = choosers.income * 5.0
        choosers['wkrs_hhs'] = choosers.workers * 1.0 / choosers.persons

    #ESTABLISHMENT VARIABLES
    e = dset.fetch('establishments')

    e['zone_id'] = b.zone_id[e.building_id].values

    e['county_id'] = b.county_id[e.building_id].values
    e['sector_id_six'] = 1 * (e.sector_id == 61) + 2 * (
        e.sector_id == 71) + 3 * np.in1d(e.sector_id, [
            11, 21, 22, 23, 31, 32, 33, 42, 48, 49
        ]) + 4 * np.in1d(e.sector_id, [7221, 7222, 7224]) + 5 * np.in1d(
            e.sector_id, [44, 45, 7211, 7212, 7213, 7223]) + 6 * np.in1d(
                e.sector_id, [51, 52, 53, 54, 55, 56, 62, 81, 92])
    e['sector_id_retail_agg'] = e.sector_id * np.logical_not(
        np.in1d(e.sector_id, [7211, 7212, 7213])) + 7211 * np.in1d(
            e.sector_id, [7211, 7212, 7213])
    e['nonres_sqft'] = b.non_residential_sqft[e.building_id].values

    #ZONE VARIABLES

    #XG: fix the mismatch zone county
    z = dset.fetch('zones')
    del z['county']
    z['zone_id'] = z.index
    zone_county = pd.read_csv(
        os.path.join(misc.data_dir(), 'TAZ_County_Table.csv'))
    zone_county = zone_county.set_index('zone_id')
    zone_county = zone_county[['county']]
    z = pd.merge(z, zone_county, left_on='zone_id', right_index=True)
    del z['zone_id']
    zu = z
    #end of fix

    z['zonal_hh'] = hh.groupby('zone_id').size()
    z['zonal_emp'] = e.groupby('zone_id').employees.sum()

    z['zone_id'] = z.index
    print z.columns
    print z[z['zone_id'] == 1722]['zonal_emp']
    del z['zone_id']

    z['residential_sqft_zone'] = b.groupby('zone_id').residential_sqft.sum()
    z['zonal_pop'] = hh.groupby('zone_id').persons.sum()
    z['residential_units_zone'] = b.groupby('zone_id').residential_units.sum()
    z['ln_residential_units_zone'] = b.groupby(
        'zone_id').residential_units.sum().apply(np.log1p)
    z['ln_residential_unit_density_zone'] = (
        b.groupby('zone_id').residential_units.sum() / z.acreage).apply(
            np.log1p)
    z['non_residential_sqft_zone'] = b.groupby(
        'zone_id').non_residential_sqft.sum()
    z['ln_non_residential_sqft_zone'] = b.groupby(
        'zone_id').non_residential_sqft.sum().apply(np.log1p)
    z['percent_sf'] = b[b.btype_hlcm == 3].groupby(
        'zone_id').residential_units.sum() * 100.0 / (
            b.groupby('zone_id').residential_units.sum())
    z['avg_unit_price_zone'] = b[(b.residential_units > 0) *
                                 (b.improvement_value > 0)].groupby(
                                     'zone_id').unit_price_residential.mean()
    z['ln_avg_unit_price_zone'] = b[
        (b.residential_units > 0) * (b.improvement_value > 0)].groupby(
            'zone_id').unit_price_residential.mean().apply(np.log1p)
    z['ln_avg_nonres_unit_price_zone'] = b[
        (b.non_residential_sqft > 0) * (b.improvement_value > 0)].groupby(
            'zone_id').unit_price_non_residential.mean().apply(np.log1p)
    z['median_age_of_head'] = hh.groupby('zone_id').age_of_head.median()
    z['mean_income'] = hh.groupby('zone_id').income.mean()
    z['median_year_built'] = b.groupby('zone_id').year_built.median().astype(
        'int32')
    z['ln_avg_land_value_per_sqft_zone'] = p.groupby(
        'zone_id').land_value_per_sqft.mean().apply(np.log1p)
    z['median_yearbuilt_post_1990'] = (
        b.groupby('zone_id').year_built.median() > 1990).astype('int32')
    z['median_yearbuilt_pre_1950'] = (b.groupby('zone_id').year_built.median()
                                      < 1950).astype('int32')
    z['percent_hh_with_child'] = hh[hh.children > 0].groupby(
        'zone_id').size() * 100.0 / z.zonal_hh
    z['percent_renter_hh_in_zone'] = hh[hh.tenure == 2].groupby(
        'zone_id').size() * 100.0 / z.zonal_hh
    z['percent_younghead'] = hh[hh.age_of_head < 30].groupby(
        'zone_id').size() * 100.0 / z.zonal_hh
    z['average_resunit_size'] = b.groupby('zone_id').sqft_per_unit.mean()
    z['zone_contains_park'] = (p[p.lu_type_id == 14].groupby('zone_id').size()
                               > 0).astype('int32')
    z['emp_sector_agg'] = e[e.sector_id == 1].groupby(
        'zone_id').employees.sum()
    z['emp_sector1'] = e[e.sector_id_six == 1].groupby(
        'zone_id').employees.sum()
    z['emp_sector2'] = e[e.sector_id_six == 2].groupby(
        'zone_id').employees.sum()
    z['emp_sector3'] = e[e.sector_id_six == 3].groupby(
        'zone_id').employees.sum()
    z['emp_sector4'] = e[e.sector_id_six == 4].groupby(
        'zone_id').employees.sum()
    z['emp_sector5'] = e[e.sector_id_six == 5].groupby(
        'zone_id').employees.sum()
    z['emp_sector6'] = e[e.sector_id_six == 6].groupby(
        'zone_id').employees.sum()
    z['jobs_within_45min'] = dset.compute_range(z.zonal_emp, 45.0)
    z['ln_jobs_within_45min'] = dset.compute_range(z.zonal_emp,
                                                   45.0).apply(np.log1p)
    z['jobs_within_30min'] = dset.compute_range(z.zonal_emp, 30.0)
    z['ln_jobs_within_30min'] = dset.compute_range(z.zonal_emp,
                                                   30.0).apply(np.log1p)
    z['jobs_within_20min'] = dset.compute_range(z.zonal_emp, 20.0)
    z['jobs_within_15min'] = dset.compute_range(z.zonal_emp, 15.0)
    z['ln_jobs_within_20min'] = dset.compute_range(z.zonal_emp,
                                                   20.0).apply(np.log1p)
    z['ln_pop_within_20min'] = dset.compute_range(z.zonal_pop,
                                                  20.0).apply(np.log1p)
    z['ln_emp_aggsector_within_5min'] = dset.compute_range(
        z.emp_sector_agg, 5.0).apply(np.log1p)
    z['ln_emp_sector1_within_15min'] = dset.compute_range(z.emp_sector1,
                                                          15.0).apply(np.log1p)
    z['ln_emp_sector2_within_15min'] = dset.compute_range(z.emp_sector2,
                                                          15.0).apply(np.log1p)
    z['ln_emp_sector3_within_10min'] = dset.compute_range(z.emp_sector3,
                                                          15.0).apply(np.log1p)
    z['ln_emp_sector3_within_15min'] = dset.compute_range(z.emp_sector3,
                                                          15.0).apply(np.log1p)
    z['ln_emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3,
                                                          20.0).apply(np.log1p)
    z['ln_emp_sector4_within_15min'] = dset.compute_range(z.emp_sector4,
                                                          15.0).apply(np.log1p)
    z['ln_emp_sector5_within_15min'] = dset.compute_range(z.emp_sector5,
                                                          15.0).apply(np.log1p)
    z['ln_emp_sector6_within_15min'] = dset.compute_range(z.emp_sector6,
                                                          15.0).apply(np.log1p)
    z['allpurpose_agglosum_floor'] = (z.allpurpose_agglosum >=
                                      0) * (z.allpurpose_agglosum)

    #Exports (for Tableau-Employment)

    z['emp_sector1_within_20min'] = dset.compute_range(z.emp_sector1, 20.0)
    z['emp_sector2_within_20min'] = dset.compute_range(z.emp_sector2, 20.0)
    z['emp_sector3_within_20min'] = dset.compute_range(z.emp_sector3, 20.0)
    z['emp_sector4_within_20min'] = dset.compute_range(z.emp_sector4, 20.0)
    z['emp_sector5_within_20min'] = dset.compute_range(z.emp_sector5, 20.0)
    z['emp_sector6_within_20min'] = dset.compute_range(z.emp_sector6, 20.0)

    z['emp_sector1_within_30min'] = dset.compute_range(z.emp_sector1, 30.0)
    z['emp_sector2_within_30min'] = dset.compute_range(z.emp_sector2, 30.0)
    z['emp_sector3_within_30min'] = dset.compute_range(z.emp_sector3, 30.0)
    z['emp_sector4_within_30min'] = dset.compute_range(z.emp_sector4, 30.0)
    z['emp_sector5_within_30min'] = dset.compute_range(z.emp_sector5, 30.0)
    z['emp_sector6_within_30min'] = dset.compute_range(z.emp_sector6, 30.0)

    z['emp_sector1_within_45min'] = dset.compute_range(z.emp_sector1, 45.0)
    z['emp_sector2_within_45min'] = dset.compute_range(z.emp_sector2, 45.0)
    z['emp_sector3_within_45min'] = dset.compute_range(z.emp_sector3, 45.0)
    z['emp_sector4_within_45min'] = dset.compute_range(z.emp_sector4, 45.0)
    z['emp_sector5_within_45min'] = dset.compute_range(z.emp_sector5, 45.0)
    z['emp_sector6_within_45min'] = dset.compute_range(z.emp_sector6, 45.0)

    z['residential_unit_per_jobs_within_15_min'] = z[
        'residential_units_zone'] / z['jobs_within_15min']
    z['residential_sqft_per_jobs_within_15_min'] = (b[np.in1d(
        b['building_type_id'],
        [2, 3, 20, 24
         ])].groupby('zone_id').bldg_sq_ft.sum()) / z['jobs_within_15min']

    ztableau = z[[
        'zonal_emp', 'emp_sector1', 'emp_sector2', 'emp_sector3',
        'emp_sector4', 'emp_sector5', 'emp_sector6', 'jobs_within_45min',
        'jobs_within_30min', 'jobs_within_20min', 'emp_sector1_within_20min',
        'emp_sector2_within_20min', 'emp_sector3_within_20min',
        'emp_sector4_within_20min', 'emp_sector5_within_20min',
        'emp_sector6_within_20min', 'emp_sector1_within_30min',
        'emp_sector2_within_30min', 'emp_sector3_within_30min',
        'emp_sector4_within_30min', 'emp_sector5_within_30min',
        'emp_sector6_within_30min', 'emp_sector1_within_45min',
        'emp_sector2_within_45min', 'emp_sector3_within_45min',
        'emp_sector4_within_45min', 'emp_sector5_within_45min',
        'emp_sector6_within_45min', 'residential_unit_per_jobs_within_15_min',
        'residential_sqft_per_jobs_within_15_min'
    ]]
    ztableau.to_csv('C:\urbansim\output\emp_tableau.csv')

    ##JOINS
    #merge parcels with zones
    pz = pd.merge(p.reset_index(), z, left_on='zone_id', right_index=True)
    pz = pz.set_index('parcel_id')
    #merge buildings with parcels/zones
    del b['county_id']
    del b['zone_id']
    bpz = pd.merge(b, pz, left_on='parcel_id', right_index=True)
    bpz['residential_units_capacity'] = bpz.parcel_sqft / 1500 - bpz.residential_units
    bpz.loc[bpz.residential_units_capacity < 0,
            "residential_units_capacity"] = 0  # corrected chained index error
    dset.d['buildings'] = bpz
    if dset.parcels.index.name != 'parcel_id':
        dset.parcels = pu

    dset.d['zones'] = zu