def loader(request): """Recreate sample schema from shapefiles and tear down when done.""" # Configure TableLoader to use directory containing sample shapefiles. root_path = os.path.dirname(__file__) data_path = os.path.join(root_path, '../../test_data') loader = TableLoader(directory=data_path) # Recreate PostgreSQL sample schema. with loader.database.cursor() as cur: cur.execute(""" CREATE EXTENSION IF NOT EXISTS postgis; DROP SCHEMA IF EXISTS sample CASCADE; CREATE SCHEMA sample; """) loader.database.refresh() # Load all shapefiles in test data directory. for filename in os.listdir(data_path): file_root, file_ext = os.path.splitext(filename) if file_ext.lower() == '.shp': shp_path = os.path.join(data_path, filename) table_name = 'sample.' + file_root loader.load_shp(shp_path, table_name) # Reproject all non-conforming SRIDs into project SRID. conform_srids(loader.srid, schema=loader.tables.sample) # Tear down sample schema when done. def teardown(): with loader.database.cursor() as cur: cur.execute("DROP SCHEMA sample CASCADE;") request.addfinalizer(teardown) return loader
'built/parcel/2010/son/PAR_PARCELS.shp', # Geometry type is MultiPolygonZM. #'staging.parcels_son_exlu': #'built/parcel/2010/son/parcels2010_son/Final2010exlu.shp', 'staging.taz': 'juris/reg/zones/taz1454.shp', 'staging.establishment_points': 'emp/micro/est10_gt1/est10_esri_gt1.shp', } # Install PostGIS and create staging schema. loader = TableLoader() with loader.database.cursor() as cur: cur.execute(""" CREATE EXTENSION IF NOT EXISTS postgis; CREATE SCHEMA IF NOT EXISTS staging; """) loader.database.refresh() # Load shapefiles specified above to the project database. loader.load_shp_map(shapefiles) # Fix invalid geometries and reproject. staging = loader.tables.staging conform_srids(loader.srid, schema=staging, fix=True) # Load county land use code mapping.
#Imports import pandas as pd, numpy as np import pandas.io.sql as sql from spandex import TableLoader from spandex.io import exec_sql, df_to_db from spandex.targets import scaling as scl import hashlib #Connect to the database loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) ##Load parcels as dataframes for the imputation parcels = db_to_df('select * from parcels;') parcels = parcels.set_index('gid') #Standardize the res_type field parcels.res_type[parcels.res_type.isnull()] = 'other' parcels.res_type[parcels.res_type == ''] = 'other' parcels.res_type[np.in1d( parcels.res_type, ['FLATS', 'APTS', 'CONDO', 'SRO', 'LIVEWORK', 'mixed'])] = 'multi' parcels.res_type[parcels.res_type == 'SINGLE'] = 'single' # Load TAZ residential unit control totals and other zonal targets.
import pandas as pd, numpy as np import pandas.io.sql as sql from spandex import TableLoader loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) # Read from database (public schema) parcels = db_to_df('select * from parcels').set_index('parcel_id') jobs = db_to_df('select * from jobs').set_index('job_id') households = db_to_df('select * from households').set_index('household_id') buildings = db_to_df('select * from buildings').set_index('building_id') hh_controls = db_to_df( 'select year, activity_id, luz_id, total_number_of_households from annual_household_control_totals' ) pecas_prices = db_to_df( 'select year, luz_id, development_type_id, price from pecas_prices') assessor_transactions = db_to_df( 'select * from assessor_transactions').set_index('building_id') zoning = db_to_df('select * from zoning') zoning_allowed_uses = db_to_df( 'select zoning_id, development_type_id from zoning_allowed_uses') fee_schedule = db_to_df( 'select fee_schedule_id, development_type_id, development_fee_per_unit_space_initial from staging.fee_schedule' )
import pandas as pd, numpy as np import pandas.io.sql as sql from spandex import TableLoader from spandex.io import exec_sql, df_to_db #Connect to the database loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) def unit_choice(chooser_ids, alternative_ids, probabilities): """ Have a set of choosers choose from among alternatives according to a probability distribution. Choice is binary: each alternative can only be chosen once. Parameters ---------- chooser_ids : 1d array_like Array of IDs of the agents that are making choices. alternative_ids : 1d array_like Array of IDs of alternatives among which agents are making choices. probabilities : 1d array_like The probability that an agent will choose an alternative. Must be the same shape as `alternative_ids`. Unavailable alternatives should have a probability of 0. Returns
import zipfile from spandex import TableLoader from spandex.io import exec_sql from spandex.spatialtoolz import conform_srids, tag import pandas as pd, numpy as np import pandas.io.sql as sql def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) loader = TableLoader() # Download puma 2000 geometry zip files for i in range(73): if i < 10: filename = 'p50%s_d00_shp.zip' % i else: filename = 'p5%s_d00_shp.zip' % i try: pumageom_file = urllib.URLopener() pumageom_file.retrieve( "http://www2.census.gov/geo/tiger/PREVGENZ/pu/p500shp/%s" % filename, os.path.join(loader.get_path('puma_geom'), filename)) print 'Downloading %s' % filename except:
import pandas as pd, numpy as np import pandas.io.sql as sql from pandas.io.excel import read_excel from spandex.io import exec_sql, df_to_db from spandex import TableLoader loader = TableLoader() ##Read Redfin CSV and load to database redfin_csv_path = loader.get_path('built/bldg/homeprices/redfin_03feb14.csv') redfin = pd.read_csv(redfin_csv_path) redfin.index.name = 'idx' df_to_db(redfin, 'redfin', schema=loader.tables.staging) ##Lat/long to point geometry, with the right SRID exec_sql("ALTER TABLE staging.redfin ADD COLUMN geom geometry;") exec_sql("UPDATE staging.redfin SET geom = ST_GeomFromText('POINT(' || longitude || ' ' || latitude || ')',4326);") exec_sql("CREATE INDEX redfin_gidx on staging.redfin using gist (geom);") exec_sql("SELECT UpdateGeometrySRID('staging', 'redfin', 'geom', 2768);") exec_sql("UPDATE staging.redfin SET geom = ST_TRANSFORM(ST_SetSRID(geom, 4326), 2768);") ##Append the unique parcel identifier to the Redfin records exec_sql("alter table staging.redfin add gid integer default 0;") exec_sql("update staging.redfin set gid = a.gid from parcels a where st_within(staging.redfin.geom, a.geom);") def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn)
import os import re import zipfile import urllib, urllib2 import pandas as pd, numpy as np from bs4 import BeautifulSoup from spandex import TableLoader loader = TableLoader() soup = BeautifulSoup(urllib2.urlopen("http://www2.census.gov/acs2013_5yr/pums/")) tags = soup.find_all(href=re.compile("csv_h..\.zip")) hpums_links = [] for t in tags: hpums_links.append(t['href']) tags = soup.find_all(href=re.compile("csv_p..\.zip")) ppums_links = [] for t in tags: ppums_links.append(t['href']) pums_links = hpums_links + ppums_links for pums_file in pums_links: print pums_file pums_file_dl = urllib.URLopener() pums_file_dl.retrieve("http://www2.census.gov/acs2013_5yr/pums/%s" % pums_file, os.path.join(loader.get_path('pums'), pums_file)) for pums_file in pums_links: filepath = os.path.join(loader.get_path('pums'), pums_file)
import pandas as pd, numpy as np import pandas.io.sql as sql from spandex import TableLoader loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) # Read from database (public schema) parcels = db_to_df('select * from parcels').set_index('parcel_id') jobs = db_to_df('select * from jobs').set_index('job_id') households = db_to_df('select * from households').set_index('household_id') buildings = db_to_df('select * from buildings').set_index('building_id') hh_controls = db_to_df('select year, activity_id, luz_id, total_number_of_households from annual_household_control_totals') pecas_prices = db_to_df('select year, luz_id, development_type_id, price from pecas_prices') assessor_transactions = db_to_df('select * from assessor_transactions').set_index('building_id') zoning = db_to_df('select * from zoning') zoning_allowed_uses = db_to_df('select zoning_id, development_type_id from zoning_allowed_uses') fee_schedule = db_to_df('select fee_schedule_id, development_type_id, development_fee_per_unit_space_initial from staging.fee_schedule') parcel_fee_schedule = db_to_df('select parcelid as parcel_id, fee_schedule_id, portion from staging.parcel_fee_schedule') # Remove uneccesary id columns appended by spandex for df in [buildings, jobs, households, assessor_transactions, zoning]: if 'id' in df.columns: del df['id'] zoning = zoning.set_index('zoning_id')
import os import pandas as pd from spandex import TableLoader, TableFrame from spandex.utils import load_config from spandex.io import exec_sql, df_to_db import pandas.io.sql as sql def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) # Build parcels TableFrame. loader = TableLoader() table = loader.database.tables.public.parcels tf = TableFrame(table, index_col='gid') # Load TAZ residential unit control totals. taz_controls_csv = loader.get_path('hh/taz2010_imputation.csv') targetunits = pd.read_csv(taz_controls_csv, index_col='taz1454')['targetunits'] # Get CSV output file directory. output_dir = loader.get_path('out/regeneration/summaries') # Generate summary CSV by county and TAZ. for grouper in ['county_id', 'taz']: df = tf[[grouper, 'non_residential_sqft', 'residential_units']] df.dropna(subset=[grouper], inplace=True)
import os import urllib import zipfile from spandex import TableLoader from spandex.io import exec_sql from spandex.spatialtoolz import conform_srids, tag import pandas as pd, numpy as np import pandas.io.sql as sql def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) loader = TableLoader() # Download puma 2000 geometry zip files for i in range(73): if i < 10: filename = 'p50%s_d00_shp.zip' % i else: filename = 'p5%s_d00_shp.zip' % i try: pumageom_file = urllib.URLopener() pumageom_file.retrieve("http://www2.census.gov/geo/tiger/PREVGENZ/pu/p500shp/%s" % filename, os.path.join(loader.get_path('puma_geom'), filename)) print 'Downloading %s' % filename except: continue
import pandas as pd, numpy as np import pandas.io.sql as sql from pandas.io.excel import read_excel from spandex.io import exec_sql, df_to_db from spandex import TableLoader loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) def csv_to_staging(path, table_name): """Loads csv to staging schema on database""" df = pd.read_csv(path) df.index.name = 'idx' print 'Loading %s.' % table_name df_to_db(df, table_name, schema=loader.tables.staging) def field_in_table(field_name, table_name): """True if field in table else False""" return field_name in db_to_df("SELECT column_name FROM information_schema.columns WHERE table_name='%s'" % table_name).column_name.values def lat_long_to_point_geometry(tbl, schema, x_col = 'longitude', y_col = 'latitude', geom_field_name = 'geom', target_srid = 2768): """Creates point geometry on db table based on lat long fields""" print 'Creating point geometry from lat/long in table %s.' % tbl if not field_in_table(geom_field_name, tbl): exec_sql("ALTER TABLE %s.%s ADD COLUMN %s geometry;" % (schema, tbl, geom_field_name)) exec_sql("UPDATE %s.%s SET %s = ST_GeomFromText('POINT(' || %s || ' ' || %s || ')',4326);" % (schema, tbl, geom_field_name, x_col, y_col))
'staging.parcels': 'space/parcel.shp', 'staging.buildings': 'space/building.shp', 'staging.blocks': 'space/tl_2010_06073_tabblock10.shp', 'staging.sitespec': 'scheduled/site_spec.shp', } # Install PostGIS and create staging schema. loader = TableLoader() with loader.database.cursor() as cur: cur.execute(""" CREATE EXTENSION IF NOT EXISTS postgis; CREATE SCHEMA IF NOT EXISTS staging; """) loader.database.refresh() t = loader.tables staging = loader.tables.staging # Load shapefiles specified above to the project database. loader.load_shp_map(shapefiles)
import pandas as pd from spandex import TableLoader import pandas.io.sql as sql loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) ## Export to HDF5- get path to output file h5_path = loader.get_path('out/regeneration/summaries/bayarea_v3.h5') ## Path to the output file #Buildings buildings = db_to_df('select * from building').set_index('building_id') if 'id' in buildings.columns: del buildings['id'] buildings['building_type_id'] = 0 buildings.building_type_id[buildings.development_type_id == 1] = 1 buildings.building_type_id[buildings.development_type_id == 2] = 3 buildings.building_type_id[buildings.development_type_id == 5] = 12 buildings.building_type_id[buildings.development_type_id == 7] = 10 buildings.building_type_id[buildings.development_type_id == 9] = 5 buildings.building_type_id[buildings.development_type_id == 10] = 4 buildings.building_type_id[buildings.development_type_id == 13] = 8 buildings.building_type_id[buildings.development_type_id == 14] = 7 buildings.building_type_id[buildings.development_type_id == 15] = 9 buildings.building_type_id[buildings.development_type_id == 13] = 8 buildings.building_type_id[buildings.development_type_id == 17] = 6 buildings.building_type_id[buildings.development_type_id == 24] = 16
import numpy as np import pandas as pd import cStringIO import psycopg2 import pandas.io.sql as sql from spandex import TableLoader loader = TableLoader() conn_string = "host='urbancanvas.cp2xwchuariu.us-west-2.rds.amazonaws.com' dbname='sandag_testing' user='******' password='******' port=5432" conn=psycopg2.connect(conn_string) cur = conn.cursor() def uc_db_to_df(query): return sql.read_frame(query, conn) parcels = uc_db_to_df("select parcel_id, zoning_id, devtype_id as development_type_id from parcel " "where projects = '{1}' and valid_from = '{-infinity}';").set_index('parcel_id') buildings = uc_db_to_df("SELECT building_id, parcel_id, building_type_id as development_type_id, improvement_value, " "residential_units, non_residential_sqft, stories, year_built, residential_sqft, " "note FROM building where projects = '{1}' and valid_from = '{-infinity}';").set_index('building_id') # Put tables in HDF5 h5_path = loader.get_path('out/sandag.h5') store = pd.HDFStore(h5_path) del store['buildings'] store['buildings'] = buildings p_prev = store.parcels.copy() p_prev['zoning_id'] = parcels.zoning_id p_prev['development_type_id'] = parcels.development_type_id
import os import pandas as pd from spandex import TableLoader, TableFrame from spandex.utils import load_config from spandex.io import exec_sql, df_to_db import pandas.io.sql as sql def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) # Build parcels TableFrame. loader = TableLoader() table = loader.database.tables.public.parcels tf = TableFrame(table, index_col='gid') # Load TAZ residential unit control totals. taz_controls_csv = loader.get_path('hh/taz2010_imputation.csv') targetunits = pd.read_csv(taz_controls_csv, index_col='taz1454')['targetunits'] # Get CSV output file directory. output_dir = loader.get_path('out/regeneration/summaries') # Generate summary CSV by county and TAZ. for grouper in ['county_id', 'taz']: df = tf[[grouper, 'non_residential_sqft', 'residential_units']] df.dropna(subset=[grouper], inplace=True) if grouper == 'taz': df[grouper] = df[grouper].astype(int)
#Imports import pandas as pd, numpy as np import pandas.io.sql as sql from spandex import TableLoader from spandex.io import exec_sql, df_to_db from spandex.targets import scaling as scl import hashlib #Connect to the database loader = TableLoader() def db_to_df(query): """Executes SQL query and returns DataFrame.""" conn = loader.database._connection return sql.read_frame(query, conn) ##Load parcels as dataframes for the imputation parcels = db_to_df('select * from parcels;') parcels = parcels.set_index('gid') #Standardize the res_type field parcels.res_type[parcels.res_type.isnull()] = 'other' parcels.res_type[parcels.res_type==''] = 'other' parcels.res_type[np.in1d(parcels.res_type, ['FLATS', 'APTS', 'CONDO', 'SRO', 'LIVEWORK', 'mixed'])] = 'multi' parcels.res_type[parcels.res_type=='SINGLE'] = 'single' # Load TAZ residential unit control totals and other zonal targets. taz_controls_csv = loader.get_path('hh/taz2010_imputation.csv') targetunits = pd.read_csv(taz_controls_csv, index_col='taz1454') taz_controls_csv2 = loader.get_path('hh/tazsumm_redfin.csv')
import os import string import subprocess import numpy as np import pandas as pd from spandex import TableLoader, TableFrame from spandex.io import df_to_db import urbansim.sim.simulation as sim import utils loader = TableLoader() staging = loader.tables.staging ## Assumptions. # Use codes were classified manually because the assessor classifications # are meant for property tax purposes. These classifications should be # reviewed and revised. res_codes = { 'single': ['01', '51', '52', '53'], 'multi': [string.zfill(i, 2) for i in range(2, 6) + range(7, 10) + range(89, 99)], 'mixed': [] } exempt_codes = [] ## Register input tables. tf = TableFrame(staging.parcels_smt, index_col='apn')
import os import re import zipfile import urllib, urllib2 import pandas as pd, numpy as np from bs4 import BeautifulSoup from spandex import TableLoader loader = TableLoader() soup = BeautifulSoup( urllib2.urlopen("http://www2.census.gov/acs2013_5yr/pums/")) tags = soup.find_all(href=re.compile("csv_h..\.zip")) hpums_links = [] for t in tags: hpums_links.append(t['href']) tags = soup.find_all(href=re.compile("csv_p..\.zip")) ppums_links = [] for t in tags: ppums_links.append(t['href']) pums_links = hpums_links + ppums_links for pums_file in pums_links: print pums_file pums_file_dl = urllib.URLopener() pums_file_dl.retrieve( "http://www2.census.gov/acs2013_5yr/pums/%s" % pums_file, os.path.join(loader.get_path('pums'), pums_file))