示例#1
0
def parcels_geography(parcels):
    df = pd.read_csv(
        os.path.join(misc.data_dir(), "02_01_2016_parcels_geography.csv"),
        index_col="geom_id")
    df = geom_id_to_parcel_id(df, parcels)

    # this will be used to map juris id to name
    juris_name = pd.read_csv(
        os.path.join(misc.data_dir(), "census_id_to_name.csv"),
        index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df.loc[2054504, "juris_name"] = "Marin County"
    df.loc[2054505, "juris_name"] = "Santa Clara County"
    df.loc[2054506, "juris_name"] = "Marin County"
    df.loc[572927, "juris_name"] = "Contra Costa County"
    # assert no empty juris values
    assert True not in df.juris_name.isnull().value_counts()

    df["pda_id"] = df.pda_id.str.lower()

    # danville wasn't supposed to be a pda
    df["pda_id"] = df.pda_id.replace("dan1", np.nan)

    return df
def get_dev_projects_table(scenario, parcels):
    df = pd.read_csv(os.path.join(DATA_DIR, "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    df.geom_id = df.geom_id.astype(float)

    cnts = df.geom_id.isin(parcels.geom_id).value_counts()
    if False in cnts.index:
        print "%d MISSING GEOMIDS!" % cnts.loc[False]

    df = df[df.geom_id.isin(parcels.geom_id)]

    geom_id = df.geom_id  # save for later
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    return df
示例#3
0
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                                  "02_01_2016_parcels_geography.csv"),
                     index_col="geom_id")
    df = geom_id_to_parcel_id(df, parcels)

    # this will be used to map juris id to name
    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df.loc[2054504, "juris_name"] = "Marin County"
    df.loc[2054505, "juris_name"] = "Santa Clara County"
    df.loc[2054506, "juris_name"] = "Marin County"
    df.loc[572927, "juris_name"] = "Contra Costa County"
    # assert no empty juris values
    assert True not in df.juris_name.isnull().value_counts()

    df["pda_id"] = df.pda_id.str.lower()

    # danville wasn't supposed to be a pda
    df["pda_id"] = df.pda_id.replace("dan1", np.nan)

    return df
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(DATA_DIR,
                                  "01_01_2017_parcels_geography.csv"),
                     index_col="geom_id")
    df = geom_id_to_parcel_id(df, parcels)

    # this will be used to map juris id to name
    juris_name = pd.read_csv(os.path.join(DATA_DIR, "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df.loc[1, "juris_name"] = "Edmonton"
    #    df.loc[2054505, "juris_name"] = "Santa Clara County"
    #    df.loc[2054506, "juris_name"] = "Marin County"
    #    df.loc[572927, "juris_name"] = "Contra Costa County"
    # Added to make proportional_elcm step of simulations work
    #    df.loc[124131, "juris_name"] = "Berkeley"
    # assert no empty juris values
    assert True not in df.juris_name.isnull().value_counts()

    df["pda_id"] = df.pda_id.astype(str).str.lower()

    #    # danville wasn't supposed to be a pda
    #    df["pda_id"] = df.pda_id.replace("dan1", np.nan)

    return df
示例#5
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    return df
示例#6
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    return df
def zoning_baseline(parcels, zoning_lookup, settings):
    df = gp.GeoDataFrame.from_file(
        os.path.join(DATA_DIR,
                     "2017_01_01_zoning_parcels.shp")).set_index("geom_id")
    df = pd.merge(df,
                  zoning_lookup.to_frame(),
                  left_on="zoning_id",
                  right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    return df
示例#8
0
def development_projects(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    df = df[df.action.isin(["add", "build"])]

    # this filters project by scenario
    colname = "scen%s" % scenario
    # df[colname] is 1s and 0s indicating whether to include it
    # this used to be an optional filter but now I'm going to require it so
    # that we don't accidentally include all the development projects since
    # we've started using scenario-based dev projects pretty extensively
    df = df[df[colname].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    for fld in [
            'residential_sqft', 'residential_price', 'non_residential_price'
    ]:
        df[fld] = 0
    df["redfin_sale_year"] = 2012  # hedonic doesn't tolerate nans
    df["stories"] = df.stories.fillna(1)
    df["building_sqft"] = df.building_sqft.fillna(0)
    df["non_residential_sqft"] = df.non_residential_sqft.fillna(0)

    df["building_type"] = df.building_type.replace("HP", "OF")
    df["building_type"] = df.building_type.replace("GV", "OF")
    df["building_type"] = df.building_type.replace("SC", "OF")
    df["building_type_id"] = \
        df.building_type.map(settings["building_type_map2"])

    df = df.dropna(subset=["geom_id"])  # need a geom_id to link to parcel_id

    df = df.dropna(subset=["year_built"])  # need a year built to get built

    df["geom_id"] = df.geom_id.astype("int")
    df = df.query('residential_units != "rent"')
    df["residential_units"] = df.residential_units.fillna(0).astype("int")
    geom_id = df.geom_id
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    # we don't predict prices for schools and hotels right now
    df = df.query("building_type_id <= 4 or building_type_id >= 7")

    df["deed_restricted_units"] = 0

    print "Describe of development projects"
    print df[orca.get_table('buildings').local_columns].describe()

    return df
示例#9
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()}

    df.columns = [d.get(x, x) for x in df.columns]

    return df
示例#10
0
def zoning_baseline(parcels, zoning_lookup, settings):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                     "2015_12_21_zoning_parcels.csv"),
                     index_col="geom_id")
    df = pd.merge(df, zoning_lookup.to_frame(),
                  left_on="zoning_id", right_index=True)
    df = geom_id_to_parcel_id(df, parcels)

    d = {k: "type%d" % v for k, v in settings["building_type_map2"].items()}

    df.columns = [d.get(x, x) for x in df.columns]

    return df
示例#11
0
def development_projects(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    df = df[df.action.isin(["add", "build"])]

    # this filters project by scenario
    colname = "scen%s" % scenario
    # df[colname] is 1s and 0s indicating whether to include it
    # this used to be an optional filter but now I'm going to require it so
    # that we don't accidentally include all the development projects since
    # we've started using scenario-based dev projects pretty extensively
    df = df[df[colname].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    for fld in ['residential_sqft', 'residential_price',
                'non_residential_price']:
        df[fld] = 0
    df["redfin_sale_year"] = 2012  # hedonic doesn't tolerate nans
    df["stories"] = df.stories.fillna(1)
    df["building_sqft"] = df.building_sqft.fillna(0)
    df["non_residential_sqft"] = df.non_residential_sqft.fillna(0)

    df["building_type"] = df.building_type.replace("HP", "OF")
    df["building_type"] = df.building_type.replace("GV", "OF")
    df["building_type"] = df.building_type.replace("SC", "OF")
    df["building_type_id"] = \
        df.building_type.map(settings["building_type_map2"])

    df = df.dropna(subset=["geom_id"])  # need a geom_id to link to parcel_id

    df = df.dropna(subset=["year_built"])  # need a year built to get built

    df["geom_id"] = df.geom_id.astype("int")
    df = df.query('residential_units != "rent"')
    df["residential_units"] = df.residential_units.fillna(0).astype("int")
    geom_id = df.geom_id
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    # we don't predict prices for schools and hotels right now
    df = df.query("building_type_id <= 4 or building_type_id >= 7")

    df["deed_restricted_units"] = 0

    print "Describe of development projects"
    print df[orca.get_table('buildings').local_columns].describe()

    return df
示例#12
0
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                                  "02_01_2016_parcels_geography.csv"),
                     index_col="geom_id", dtype={'jurisdiction': 'str'})
    df = geom_id_to_parcel_id(df, parcels)

    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df["pda_id"] = df.pda_id.str.lower()

    return df
示例#13
0
def parcels_geography(parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(),
                                  "02_01_2016_parcels_geography.csv"),
                     index_col="geom_id",
                     dtype={'jurisdiction': 'str'})
    df = geom_id_to_parcel_id(df, parcels)

    juris_name = pd.read_csv(os.path.join(misc.data_dir(),
                                          "census_id_to_name.csv"),
                             index_col="census_id").name10

    df["juris_name"] = df.jurisdiction_id.map(juris_name)

    df["pda_id"] = df.pda_id.str.lower()

    return df
示例#14
0
def demolish_events(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    # keep demolish and build records
    df = df[df.action.isin(["demolish", "build"])]

    df = df.dropna(subset=['geom_id'])
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id

    return df
示例#15
0
def demolish_events(parcels, settings, scenario):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    # keep demolish and build records
    df = df[df.action.isin(["demolish", "build"])]

    df = df.dropna(subset=['geom_id'])
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id

    return df
示例#16
0
def get_dev_projects_table(scenario, parcels):
    df = pd.read_csv(os.path.join(misc.data_dir(), "development_projects.csv"))
    df = reprocess_dev_projects(df)

    # this filters project by scenario
    if scenario in df:
        # df[scenario] is 1s and 0s indicating whether to include it
        df = df[df[scenario].astype('bool')]

    df = df.dropna(subset=['geom_id'])

    cnts = df.geom_id.isin(parcels.geom_id).value_counts()
    if False in cnts.index:
        print "%d MISSING GEOMIDS!" % cnts.loc[False]

    df = df[df.geom_id.isin(parcels.geom_id)]

    geom_id = df.geom_id  # save for later
    df = df.set_index("geom_id")
    df = geom_id_to_parcel_id(df, parcels).reset_index()  # use parcel id
    df["geom_id"] = geom_id.values  # add it back again cause it goes away

    return df
示例#17
0
def static_parcels(settings, parcels):
    # list of geom_ids to not relocate
    static_parcels = settings["static_parcels"]
    # geom_ids -> parcel_ids
    return geom_id_to_parcel_id(
        pd.DataFrame(index=static_parcels), parcels).index.values
示例#18
0
def static_parcels(settings, parcels):
    # list of geom_ids to not relocate
    static_parcels = settings["static_parcels"]
    # geom_ids -> parcel_ids
    return geom_id_to_parcel_id(
        pd.DataFrame(index=static_parcels), parcels).index.values