def run(self): my_file = Path('temp/data.h5') if my_file.is_file(): print'File exists' else: db_run_id = log.new_run(name='emp_run_log') run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') engine = create_engine(get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') tables = util.yaml_to_dict('model_config.yml', 'db_tables') in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], rate_versions['inc_pop']) in_query2 = getattr(sql, 'inc_pop_mil') % (tables['population_table'], rate_versions['population']) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop = pop.join(pop_mil) pop['persons'] = (pop['persons'] - pop['mil_mildep']) pop = pop.reset_index(drop=False) pop['age_cat'] = '' pop.loc[pop['age'].isin(list(range(0, 5))), ['age_cat']] = '00_04' pop.loc[pop['age'].isin(list(range(5, 10))), ['age_cat']] = '05_09' pop.loc[pop['age'].isin(list(range(10, 15))), ['age_cat']] = '10_14' pop.loc[pop['age'].isin(list(range(15, 18))), ['age_cat']] = '15_17' pop.loc[pop['age'].isin(list(range(18, 20))), ['age_cat']] = '18_19' pop.loc[pop['age'].isin(list(range(20, 21))), ['age_cat']] = '20_20' pop.loc[pop['age'].isin(list(range(21, 22))), ['age_cat']] = '21_21' pop.loc[pop['age'].isin(list(range(22, 25))), ['age_cat']] = '22_24' pop.loc[pop['age'].isin(list(range(25, 30))), ['age_cat']] = '25_29' pop.loc[pop['age'].isin(list(range(30, 35))), ['age_cat']] = '30_34' pop.loc[pop['age'].isin(list(range(35, 40))), ['age_cat']] = '35_39' pop.loc[pop['age'].isin(list(range(40, 45))), ['age_cat']] = '40_44' pop.loc[pop['age'].isin(list(range(45, 50))), ['age_cat']] = '45_49' pop.loc[pop['age'].isin(list(range(50, 55))), ['age_cat']] = '50_54' pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59' pop.loc[pop['age'].isin(list(range(60, 62))), ['age_cat']] = '60_61' pop.loc[pop['age'].isin(list(range(62, 65))), ['age_cat']] = '62_64' pop.loc[pop['age'].isin(list(range(65, 67))), ['age_cat']] = '65_66' pop.loc[pop['age'].isin(list(range(67, 70))), ['age_cat']] = '67_69' pop.loc[pop['age'].isin(list(range(70, 75))), ['age_cat']] = '70_74' pop.loc[pop['age'].isin(list(range(75, 80))), ['age_cat']] = '75_79' pop.loc[pop['age'].isin(list(range(80, 85))), ['age_cat']] = '80_84' pop.loc[pop['age'].isin(list(range(85, 103))), ['age_cat']] = '85_99' pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat'], pop['sex'], pop['race_ethn']]).sum()) print pop.head() pop.to_hdf('temp/data.h5', 'pop', mode='a')
def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string( 'model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) in_query = getattr(sql, 'max_run_id') db_run_id = pd.read_sql(in_query, engine, index_col=None) # db_run_id = log.new_run(name='inc_run_log', run_id=db_run_id['max'].iloc[0]) run_id = pd.Series([db_run_id['max'].iloc[0]]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') tables = util.yaml_to_dict('model_config.yml', 'db_tables') in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], run_id[0]) in_query2 = getattr(sql, 'inc_mil_hh_pop') % ( tables['population_table'], rate_versions['population']) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep']) pop = pop.join(pop_mil) pop['persons'] = (pop['persons'] - pop['mil_mildep']) pop = pop.reset_index(drop=False) pop = pop[pop['age'] >= 18] pop['age_cat'] = '' pop.loc[pop['age'].isin(list(range(18, 25))), ['age_cat']] = '18_24' pop.loc[pop['age'].isin(list(range(25, 35))), ['age_cat']] = '25_34' pop.loc[pop['age'].isin(list(range(35, 45))), ['age_cat']] = '35_44' pop.loc[pop['age'].isin(list(range(45, 55))), ['age_cat']] = '45_54' pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59' pop.loc[pop['age'].isin(list(range(60, 65))), ['age_cat']] = '60_64' pop.loc[pop['age'].isin(list(range(65, 75))), ['age_cat']] = '65_74' pop.loc[pop['age'].isin(list(range(75, 103))), ['age_cat']] = '75_99' pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat']]).sum()) pop.to_hdf('temp/data.h5', 'pop', mode='a')
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) pop = pd.read_hdf('temp/data.h5', 'pop') inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', index=['yr', 'age_cat']) inc_type_rates = inc_type_rates.join(pop) inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share']) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum()) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates = pd.pivot_table(inc_type_rates, values='totals', index=['yr'], columns=['income_type']) # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a') inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True) inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True) inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True) inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True) inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True) inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True) inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement', 'Supplemental_Social_Security', 'Social_Security']] run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] inc_type_rates['run_id'] = run_id inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] population_summary = [] # initialize list for population by year pop = pd.read_hdf('temp/data.h5', 'pop') mig_out = pd.read_hdf('temp/data.h5', 'mig_out') mig_in = pd.read_hdf('temp/data.h5', 'mig_in') dead_pop = pd.read_hdf('temp/data.h5', 'dead_pop') new_born = pd.read_hdf('temp/data.h5', 'new_born') population_summary.append({'Year': self.year, 'Run_id': run_id, 'Population': pop['persons'].sum(), 'mig_out': mig_out['mig_Dout'].sum() + mig_out['mig_Fout'].sum(), 'mig_in': mig_in['mig_Din'].sum() + mig_in['mig_Fin'].sum(), 'deaths': dead_pop['deaths'].sum(), 'new_born': new_born['new_born'].sum()}) for table in [pop, mig_out, mig_in, dead_pop, new_born]: # df = pd.read_hdf('temp/data.h5', table) table['yr'] = self.year table['run_id'] = run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True) mig_out.to_sql(name='mig_out', con=engine, schema='defm', if_exists='append', index=True) mig_in.to_sql(name='mig_in', con=engine, schema='defm', if_exists='append', index=True) dead_pop.to_sql(name='dead_pop', con=engine, schema='defm', if_exists='append', index=True) new_born.to_sql(name='new_born', con=engine, schema='defm', if_exists='append', index=True) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
def new_run(name='runs', run_id=None, econ_id=0, dem_id=0): Base = declarative_base() table_name = name class Run(Base): __tablename__ = table_name __table_args__ = {'schema': 'defm'} # define columns for the table id = Column(Integer, primary_key=True) economic_scenario_id = Column(Integer) demographic_scenario_id = Column(Integer) #metadata = MetaData(schema="defm") engine = create_engine( get_connection_string( "model_config.yml", 'output_database')).execution_options(schema_translate_map={ None: "defm", # no schema name -> "defm" }) Base.metadata.schema = 'defm' if not engine.has_table(table_name, schema='defm'): Base.metadata.create_all(engine) db_session = sessionmaker(bind=engine) session = db_session() # Insert versions in database model_run = Run(economic_scenario_id=econ_id, demographic_scenario_id=dem_id) session.add(model_run) session.commit() run_id = model_run.id return run_id
def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) sectoral_share = extract.create_df('sectoral_share', 'sectoral_share_table', index=['yr', 'sandag_sector']) sectoral_pay = extract.create_df('sectoral_pay', 'sectoral_pay_table', index=['yr', 'sandag_sector']) jobs = pd.read_hdf('temp/data.h5', 'jobs') jobs = jobs[['jobs_total']] jobs = jobs.join(sectoral_share, how='right') jobs['sector_jobs'] = (jobs['jobs_total'] * jobs['share']).round() jobs = jobs.drop(['jobs_total'], 1) jobs = jobs.join(sectoral_pay) jobs['tot_ann_job_pay'] = (jobs['sector_jobs'] * jobs['annual_pay']).round() jobs.to_hdf('temp/data.h5', 'sectoral', mode='a') run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] jobs['run_id'] = run_id jobs.to_sql(name='sectors', con=engine, schema='defm', if_exists='append', index=True)
def to_database(scenario=' ', rng=range(0, 0), urbansim_connection=get_connection_string("configs/dbconfig.yml", 'urbansim_database'), default_schema='urbansim_output'): """ df_name: Required parameter, is the name of the table that will be read from the H5 file, Also first half of the table name to be stored in the database urbansim_connection: sql connection, default is for urbansim_database year: year of information to be caputured, should be pass the same range as simulation period minus first and last year. defalut_schema: The schema name under which to save the data, default is urbansim_output """ conn = psycopg2.connect(database="urbansim", user="******", password="******", host="socioeca8", port="5432") cursor = conn.cursor() t = (scenario,) cursor.execute('SELECT scenario_id FROM urbansim_output.parent_scenario WHERE scenario_name=%s', t) scenario_id = cursor.fetchone() cursor.execute('SELECT parent_scenario_id FROM urbansim_output.parent_scenario WHERE scenario_name=%s', t) parent_scenario_id = cursor.fetchone() conn.close() for year in rng: if year == 0 and scenario_id[0] == 1: for x in ['parcels', 'buildings', 'jobs']: print 'exporting ' + x + str(year) + ' ' + str(scenario_id[0]) df = pd.read_hdf('data\\results.h5', 'base/' + x) df['parent_scenario_id'] = parent_scenario_id[0] df.to_sql(x + '_base', urbansim_connection, schema=default_schema, if_exists='append') elif year == rng[len(rng)-1]: for x in ['buildings', 'feasibility', 'jobs']: print 'exporting ' + x + str(year) + ' ' + str(scenario_id[0]) df = pd.read_hdf('data\\results.h5', str(year) + '/' + x) if x == 'feasibility': df = df['residential'] df.rename(columns={'total_sqft': 'total_sqft_existing_bldgs'}, inplace=True) df = df[(df.addl_units > 0) or (df.non_residential_sqft > 0)] df['existing_units'] = np.where(df['new_built_units'] == 0, df['total_residential_units'], \ df['total_residential_units'] - df['addl_units']) elif x == 'buildings': df = df[df.new_bldg == 1] df.sch_dev = df.sch_dev.astype(int) df.new_bldg = df.new_bldg.astype(int) elif x == 'jobs': df = df[df.index > get_max_job_id()] df['year'] = year df['scenario_id'] = scenario_id[0] df['parent_scenario_id'] = parent_scenario_id[0] df.to_sql(x, urbansim_connection, schema=default_schema, if_exists='append')
def run(self): my_file = Path('temp/data.h5') if my_file.is_file(): print 'File exists' else: db_run_id = log.new_run() run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') pop = extract.create_df('population', 'population_table') pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a') pop2 = pop[(pop['type'] == 'HHP')] pop2 = pop2.reset_index(drop=False) pop2 = pd.DataFrame(pop2['persons'].groupby( [pop2['age'], pop2['race_ethn'], pop2['sex']]).sum()) pop2.rename(columns={'persons': 'persons_sum'}, inplace=True) pop2 = pop.join(pop2) pop2['rates'] = np.where(pop2['type'].isin(['INS', 'OTH']), (pop2['persons'] / pop2['persons_sum']), 0) rates = pop2[['mildep', 'type', 'rates']] rates.to_hdf('temp/data.h5', 'ins_oth_rates', format='table', mode='a') engine = create_engine( get_connection_string("model_config.yml", 'output_database')) population_summary = [] population_summary.append({ 'Year': self.year - 1, 'Run_id': run_id[0], 'Population': pop['persons'].sum(), 'mig_out': 0, 'mig_in': 0, 'deaths': 0, 'new_born': 0 }) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False) pop['yr'] = 2010 pop['run_id'] = db_run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string( 'model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) in_query = getattr(sql, 'max_run_id') db_run_id = pd.read_sql(in_query, engine, index_col=None) run_id = pd.Series([db_run_id['id'].iloc[0]]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') tables = util.yaml_to_dict('model_config.yml', 'db_tables') dem_sim_rates = pd.read_hdf('temp/data.h5', 'dem_sim_rates') in_query = getattr( sql, 'inc_mil_gc_pop') % (tables['inc_pop_table'], run_id[0]) in_query2 = getattr(sql, 'inc_mil_hh_pop') % ( tables['population_table'], dem_sim_rates.base_population_id[0]) pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex']) pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex']) pop_mil = pop_mil.loc[pop_mil['mildep'] == 'Y'] pop = pop.join(pop_mil) pop.rename(columns={'persons': 'mil_gc_pop'}, inplace=True) pop.rename(columns={'mil_mildep': 'mil_hh_pop'}, inplace=True) pop = pop.reset_index(drop=False) pop = pd.DataFrame(pop[['mil_gc_pop', 'mil_hh_pop']].groupby([pop['yr']]).sum()) pop.to_hdf('temp/data.h5', 'mil_pop', mode='a')
def insert_run(db_name, model_run_id, df_results, table_name): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) # Insert prediction in the population table df_results['run_id'] = model_run_id # foreign key to run log table df_results.to_sql(name=table_name, con=engine, schema='defm', if_exists='append', index=True) df_results = df_results.drop('run_id', 1) # remove run_id
def run(self): engine = create_engine( get_connection_string("model_config.yml", 'output_database')) econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') trs_rates = extract.create_df('trs', 'trs_table', rate_id=econ_sim_rates.trs_id[0], index=['yr']) hh_income = pd.read_hdf('temp/data.h5', 'hh_income') mil_income = pd.read_hdf('temp/data.h5', 'mil_income') ue_income = pd.read_hdf('temp/data.h5', 'ue_income') inc = hh_income.join(mil_income) inc = inc.join(ue_income) inc = inc.join(trs_rates) inc['unearned_income'] = (inc['Interest'] + inc['Other'] + inc['Public_Assistance'] + inc['Retirement'] + inc['Supplemental_Social_Security'] + inc['Social_Security']).round() inc['personal_income'] = (inc['jobs_local_wages'] + inc['wf_outside_wages'] + inc['unearned_income'] + inc['Selfemp_Income'] + inc['military_income']).round() inc['taxable_retail_sales'] = (inc['personal_income'] * inc['trs_pct']).round() inc = inc[[ 'labor_force', 'unemployed', 'work_force', 'work_force_outside', 'work_force_local', 'jobs_local', 'jobs_total', 'jobs_external', 'avg_wage', 'jobs_total_wages', 'jobs_local_wages', 'jobs_external_wages', 'wf_outside_wages', 'military_income', 'unearned_income', 'Selfemp_Income', 'personal_income', 'taxable_retail_sales' ]] run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] inc['run_id'] = run_id inc.to_sql(name='emp_summary', con=engine, schema='defm', if_exists='append', index=True)
def new_run(name='runs'): Base = declarative_base() table_name = name class Run(Base): __tablename__ = table_name __table_args__ = {'schema': 'defm'} # define columns for the table id = Column(Integer, primary_key=True) base_rate_version = Column(Integer) birth_rate_version = Column(Integer) death_rate_version = Column(Integer) migration_rate_version = Column(Integer) householder_rate_version = Column(Integer) #metadata = MetaData(schema="defm") db_dir = 'results/' if not os.path.exists(db_dir): os.makedirs(db_dir) engine = create_engine( get_connection_string( "model_config.yml", 'output_database')).execution_options(schema_translate_map={ None: "defm", # no schema name -> "defm" }) Base.metadata.schema = 'defm' if not engine.has_table(table_name, schema='defm'): Base.metadata.create_all(engine) db_session = sessionmaker(bind=engine) session = db_session() # Rate versions from yml file rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') # Insert versions in database model_run = Run(base_rate_version=rate_versions['population'], birth_rate_version=rate_versions['birth'], death_rate_version=rate_versions['death'], migration_rate_version=rate_versions['migration'], householder_rate_version=rate_versions['householder']) session.add(model_run) session.commit() run_id = model_run.id return run_id
def create_df(data_type, db_table, pivot=False, index=['age', 'race_ethn', 'sex']): """ Create pandas DataFrame from database SQL query to select base population or rate versions to be used in model. Args: data_type : string type of data (e.g. birth, migration, population) db_table : string database table name pivot : boolean, optional (default False) Returns: df_sql_result : pandas DataFrame SQL query result """ # connect to database using SQLAlchemy db_connection_string = database.get_connection_string( 'model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) # retrieve rate versions for current model and database table names to query rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') tables = util.yaml_to_dict('model_config.yml', 'db_tables') # build query from sql.py # use database table name and rate versions from .yml file in_query = getattr( sql, data_type) % (tables[db_table], rate_versions[data_type]) # pandas DataFrame from query df_sql_result = pd.read_sql(in_query, sql_in_engine) # Special case for migration rates: pivot DataFrame since 4 rates in cols # rates are: domestic in, domestic out, foreign in, foreign out if pivot: df_sql_result = util.apply_pivot(df_sql_result) # create MultiIndex on cohort attributes if index is not None: df_sql_result = df_sql_result.set_index(index) return df_sql_result
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates') pop = pd.read_hdf('temp/data.h5', 'pop') inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', rate_id=econ_sim_rates.inc1_id[0], index=['yr', 'age_cat']) inc_type_rates = inc_type_rates.join(pop) inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share']) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates['multiplier'] = 0 aigr_table = extract.create_df('aigr', 'aigr_table', rate_id=econ_sim_rates.aigr_id[0], index=None) inc_type_rates.loc[inc_type_rates['yr'] > 2014, ['multiplier']] = (aigr_table.aigr[0] * (inc_type_rates['yr'] - 2014)) # pow(1.01, mil_wages.index.get_level_values('yr') - 2014) inc_type_rates['totals'] = (inc_type_rates['totals'] + inc_type_rates['totals'] * inc_type_rates['multiplier']) inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum()) inc_type_rates = inc_type_rates.reset_index(drop=False) inc_type_rates = pd.pivot_table(inc_type_rates, values='totals', index=['yr'], columns=['income_type']) # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a') inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True) inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True) inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True) inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True) inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True) inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True) inc_type_rates.rename(columns={'semp': 'Selfemp_Income'}, inplace=True) inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement', 'Supplemental_Social_Security', 'Social_Security', 'Selfemp_Income']] inc_type_rates.to_hdf('temp/data.h5', 'ue_income') run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] inc_type_rates['run_id'] = run_id inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)
def run(self): engine = create_engine(get_connection_string("model_config.yml", 'output_database')) run_table = pd.read_hdf('temp/data.h5', 'run_id') run_id = run_table[0] population_summary = [] # initialize list for population by year pop = pd.read_hdf('temp/data.h5', 'pop') mig_out = pd.read_hdf('temp/data.h5', 'mig_out') mig_in = pd.read_hdf('temp/data.h5', 'mig_in') dead_pop = pd.read_hdf('temp/data.h5', 'dead_pop') new_born = pd.read_hdf('temp/data.h5', 'new_born') population_summary.append({'Year': self.year, 'Run_id': run_id, 'Population': pop['persons'].sum(), 'mig_out': mig_out['mig_Dout'].sum() + mig_out['mig_Fout'].sum(), 'mig_in': mig_in['mig_Din'].sum() + mig_in['mig_Fin'].sum(), 'deaths_hhp_non_mil': dead_pop['deaths_hhp_non_mil'].sum(), 'new_born': new_born['new_born'].sum()}) for table in [pop, mig_out, mig_in, dead_pop, new_born]: table['yr'] = self.year table['run_id'] = run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True) # pop.to_csv('temp/pop.csv', header=False) # csv_data = csv.reader(file('temp/pop.csv')) # database = psycopg2.connect(get_connection_string("model_config.yml", 'output_database')) # cursor = database.cursor() # for row in csv_data: # cursor.execute("INSERT INTO defm.population(age, race_ethn, sex, type, mildep, persons, households, yr," # " run_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", row) # # cursor.close() # database.commit() # database.close() mig_out.to_sql(name='mig_out', con=engine, schema='defm', if_exists='append', index=True) mig_in.to_sql(name='mig_in', con=engine, schema='defm', if_exists='append', index=True) dead_pop.to_sql(name='dead_pop', con=engine, schema='defm', if_exists='append', index=True) new_born.to_sql(name='new_born', con=engine, schema='defm', if_exists='append', index=True) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
def create_df(data_type,db_table,pivot=False): """ Create pandas DataFrame from database SQL query to select base population or rate versions to be used in model. Args: data_type : string type of data (e.g. birth, migration, population) db_table : string database table name pivot : boolean, optional (default False) Returns: df_sql_result : pandas DataFrame SQL query result """ # connect to database using SQLAlchemy db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) # retrieve rate versions for current model and database table names to query rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions') tables = util.yaml_to_dict('model_config.yml', 'db_tables') # build query from sql.py # use database table name and rate versions from .yml file in_query = getattr(sql,data_type) % (tables[db_table],rate_versions[data_type]) # pandas DataFrame from query df_sql_result = pd.read_sql(in_query, sql_in_engine) # Special case for migration rates: pivot DataFrame since 4 rates in cols # rates are: domestic in, domestic out, foreign in, foreign out if pivot: df_sql_result = util.apply_pivot(df_sql_result) # create MultiIndex on cohort attributes df_sql_result = df_sql_result.set_index(['age','race_ethn','sex']) return df_sql_result
def test_get_connection_string(self): compare_string = 'mssql+pyodbc://user_name:a_password@sqlserverdatabase/a_database_name?driver=SQL+Server+Native+Client+11.0' self.assertEqual(database.get_connection_string('db.yml', 'in_db'), compare_string) cfg = { 'in_db': { 'sql_alchemy_driver': 'mssql+pyodbc', 'driver': 'SQL+Server+Native+Client+11.0', 'host': 'sqlserverdatabase', 'database': 'a_database_name', 'port:': '', 'user': '******', 'password': '******'}, 'out_db': { 'sql_alchemy_driver': 'postgresql', 'driver': '', 'host': '', 'database': '', 'port': '', 'user': '', 'password': ''} }
def run(self): # create file only first year, exists in subsequent years my_file = Path('temp/data.h5') if my_file.is_file(): print'File exists' else: # only first year db_run_id = log.new_run(dem_id=self.dem_id, econ_id=self.econ_id) run_id = pd.Series([db_run_id]) run_id.to_hdf('temp/data.h5', 'run_id', mode='a') dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table', rate_id=self.dem_id, index=None) dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates', mode='a') pop = extract.create_df('population', 'population_table', rate_id=dem_sim_rates.base_population_id[0]) pop.to_hdf('temp/data.h5', 'pop', mode='a') # Create function here and test # to get ratio of INS and OTH to HHP to keep constant rates = cp.compute_ins_oth_rate(pop) rates.to_hdf('temp/data.h5', 'ins_oth_rates', mode='a') engine = create_engine(get_connection_string("model_config.yml", 'output_database')) population_summary = [] population_summary.append({'Year': self.year - 1, 'Run_id': run_id[0], 'Population': pop['persons'].sum(), 'mig_out': 0, 'mig_in': 0, 'deaths_hhp_non_mil': 0, 'new_born': 0}) summary_df = pd.DataFrame(population_summary) summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False) pop['yr'] = self.year - 1 pop['run_id'] = db_run_id pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
default_emfac_vmt = pd.read_csv(input_path) default_emfac_vmt['emfac_vehicle_class'] = default_emfac_vmt.apply( lambda x: x.vehicle_class.upper() + ' - ' + x.fuel.upper(), axis=1) calendar_year = default_emfac_vmt['calendar_year'].min() # RESET THE INDEX default_emfac_vmt = default_emfac_vmt[['emfac_vehicle_class', 'vmt']].set_index('emfac_vehicle_class') default_emfac_vmt.ix['LDA - GAS']['vmt'] = \ default_emfac_vmt.ix['LDA - GAS']['vmt'] + default_emfac_vmt.ix['LDA - ELEC']['vmt'] default_emfac_vmt.ix['LDT1 - GAS']['vmt'] = \ default_emfac_vmt.ix['LDT1 - GAS']['vmt'] + default_emfac_vmt.ix['LDT1 - ELEC']['vmt'] # READ IN THE VEHICLE CLASSES FROM THE DATABASE sql_in_engine = create_engine(get_connection_string("dbconfig.yml", 'in_db')) emfac_class_mapping = pd.read_sql(emfac_vehicle_class_sql, sql_in_engine, index_col='emfac_vehicle_class') # JOIN DATA AND CLEAN-UP ELECTRIC VEHICLE emfac_vmt = default_emfac_vmt.join(emfac_class_mapping, how='outer') emfac_vmt = emfac_vmt[~emfac_vmt.index.isin(['LDA - ELEC', 'LDT1 - ELEC'])] emfac_vmt.fillna(0, inplace=True) emfac_vmt.reset_index(inplace=True) emfac_vmt.sort(columns="emfac_vehicle_class_id", inplace=True) emfac_vmt.set_index('emfac_vehicle_class_id', inplace=True) # GROUP BY AND GET PERCENTAGE BY GROUP class_map = pd.read_sql(class_map_sql,
import models, utils import orca from sqlalchemy import create_engine from pysandag.database import get_connection_string orca.run( [ "feasibility", # compute development feasibility "residential_developer" # build residential buildings ], iter_vars=range(2016, 2051)) db_connection_string = get_connection_string('data\config.yml', 'mssql_db') mssql_engine = create_engine(db_connection_string) buildings = orca.get_table('buildings').to_frame() buildings = buildings.reset_index(drop=False) buildings = buildings.loc[(buildings['building_id'] > 2889578)] buildings['run_id'] = 1 buildings['run_desc'] = 'random' buildings.to_sql(name='urbansim_lite_output', con=mssql_engine, schema='urbansim', if_exists='append', index=False)
#'employment_controls', #'fee_schedule', #'household_controls' #'households', #'jobs', #'nodes', #'parcels', #'parks', #'scheduled_development_event', #'schools', #'transit', #'zoning_allowed_use', #'zoning', ] sql_in_engine = create_engine(get_connection_string("dbconfig.yml", 'in_db')) sql_out_engine = create_engine(get_connection_string("dbconfig.yml", 'out_db')) schema = datasets['schema'] metadata = MetaData(bind=sql_out_engine, schema=schema) ##PROCESS SELECTED DATASETS for key in selected: dataset = datasets[key] print ">>> {0}".format(key) ##INPUT QUERY in_query_non_spatial = dataset['in_query_non_spatial'] ##Pandas Data Frame for non-spatial data df_non_spatial = pd.read_sql(in_query_non_spatial, sql_in_engine, index_col= dataset['index_col'])
from __future__ import print_function from sqlalchemy import create_engine from pysandag.database import get_connection_string import pandas as pd postgres_engine = create_engine( get_connection_string("config.yml", 'postgres_database')) db_connection_string = get_connection_string('config.yml', 'mssql_db') mssql_engine = create_engine(db_connection_string) parcels_sql = ''' SELECT parcel_id ,jurisdiction_id ,jurisdiction_name ,building_type_id ,capacity ,residential_units ,total_cap ,num_of_bldgs ,distance_to_coast FROM urbansim.urbansim.input_residential_capacity ''' households_sql = ''' SELECT yr AS year ,sum(households) AS hh FROM urbansim.urbansim.household_control GROUP BY yr ORDER BY yr
from bokeh.io import curdoc, gridplot from bokeh.layouts import row, widgetbox, column from bokeh.models import ColumnDataSource, LabelSet, Plot, DataRange1d, LinearAxis, Grid, LassoSelectTool, WheelZoomTool, SaveTool, ResetTool from bokeh.models.widgets import Slider, TextInput from bokeh.plotting import figure, output_file, show from bokeh.charts import Bar, output_file, show from bokeh.models.glyphs import HBar from bokeh.models import ( ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label, CategoricalColorMapper, ranges ) from bokeh.layouts import layout from forecast import util from db import sql defm_engine = create_engine(get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) in_query = getattr(sql, 'max_run_id') db_run_id = pd.read_sql(in_query, defm_engine, index_col=None) run_id = db_run_id['id'].iloc[0] results_sql = '''SELECT "Population" as pop_py ,"Run_id" ,"Year" ,deaths_hhp_non_mil as deaths_py ,mig_in - mig_out as net_mig_py
from sqlalchemy import create_engine from pysandag.database import get_connection_string import pandas as pd urbansim_engine = create_engine(get_connection_string("configs/dbconfig.yml", 'urbansim_database')) nodes_sql = 'SELECT node as node_id, x, y, on_ramp FROM urbansim.nodes' edges_sql = 'SELECT from_node as [from], to_node as [to], distance as [weight] FROM urbansim.edges' parcels_sql = 'SELECT parcel_id, luz_id, parcel_acres as acres, zoning_id, centroid.STX as x, centroid.STY as y, distance_to_coast, distance_to_freeway FROM urbansim.parcels' buildings_sql = 'SELECT building_id, parcel_id, development_type_id as building_type_id, COALESCE(residential_units, 0) as residential_units, residential_sqft, COALESCE(non_residential_sqft,0) as non_residential_sqft, 0 as non_residential_rent_per_sqft, COALESCE(year_built, -1) year_built, COALESCE(stories, 1) as stories FROM urbansim.buildings' households_sql = 'SELECT household_id, building_id, persons, age_of_head, income, children FROM urbansim.households' jobs_sql = 'SELECT job_id, building_id, sector_id FROM urbansim.jobs' building_sqft_per_job_sql = 'SELECT luz_id, development_type_id, sqft_per_emp FROM urbansim.building_sqft_per_job' scheduled_development_events_sql = """SELECT scheduled_development_event_id, parcel_id, development_type_id as building_type_id ,year_built, sqft_per_unit, residential_units, non_residential_sqft ,improvement_value, res_price_per_sqft, nonres_rent_per_sqft as non_residential_rent_per_sqft ,COALESCE(stories,1) as stories FROM urbansim.scheduled_development_event""" schools_sql = """SELECT objectID as id, Shape.STX as x ,Shape.STY as y FROM gis.schools WHERE SOCType IN ('Junior High Schools (Public)','K-12 Schools (Public)','Preschool','Elemen Schools In 1 School Dist. (Public)','Elementary Schools (Public)','Intermediate/Middle Schools (Public)','High Schools (Public)','Private')""" parks_sql = """SELECT subparcel as park_id, shape.STCentroid().STX x, shape.STCentroid().STY y FROM gis.landcore WHERE lu IN (7207,7210,7211,7600,7601,7604,7605)""" transit_sql = 'SELECT x, y, stopnum FROM gis.transit_stops' household_controls_sql = """SELECT yr as [year], hh_income_id as income_quartile, hh FROM isam.defm.households WHERE dem_version = 'S0021' and eco_version = '001' AND yr >= 2015""" employment_controls_sql = """SELECT yr as [year], jobs as number_of_jobs, sector_id FROM isam.defm.jobs WHERE dem_version = 'S0021' and eco_version = '001' AND yr >= 2015""" zoning_allowed_uses_sql = """SELECT development_type_id, zoning_id FROM urbansim.zoning_allowed_use ORDER BY development_type_id, zoning_id""" fee_schedule_sql = """SELECT development_type_id, development_fee_per_unit_space_initial FROM urbansim.fee_schedule""" zoning_sql = """SELECT zoning_id, max_dua, max_building_height as max_height, max_far FROM urbansim.zoning""" nodes_df = pd.read_sql(nodes_sql, urbansim_engine, index_col='node_id') edges_df = pd.read_sql(edges_sql, urbansim_engine) parcels_df = pd.read_sql(parcels_sql, urbansim_engine, index_col='parcel_id')
from bokeh.io import curdoc, gridplot from bokeh.layouts import row, widgetbox, column from bokeh.models import ColumnDataSource, LabelSet, Plot, DataRange1d, LinearAxis, Grid, LassoSelectTool, WheelZoomTool, SaveTool, ResetTool from bokeh.models.widgets import Slider, TextInput from bokeh.plotting import figure, output_file, show from bokeh.charts import Bar, output_file, show from bokeh.models.glyphs import HBar from bokeh.models import (ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label, CategoricalColorMapper, ranges) from bokeh.layouts import layout from forecast import util from db import sql defm_engine = create_engine( get_connection_string("model_config.yml", 'output_database')) db_connection_string = database.get_connection_string('model_config.yml', 'in_db') sql_in_engine = create_engine(db_connection_string) in_query = getattr(sql, 'max_run_id') db_run_id = pd.read_sql(in_query, defm_engine, index_col=None) run_id = db_run_id['max'].iloc[0] results_sql = '''SELECT "Population" as pop_py ,"Run_id" ,"Year" ,deaths as deaths_py ,mig_in - mig_out as net_mig_py
import numpy as np import pandas as pd from pysandag.database import get_connection_string from sqlalchemy import create_engine from urbansim.models.dcm import unit_choice urbansim_engine = create_engine(get_connection_string("../postgresql/dbconfig.yml", 'in_db')) #, legacy_schema_aliasing=False) def random_allocate_agents_by_geography(agents, containers, geography_id_col, containers_units_col): """Allocate agents (e.g., households, jobs) to a container (e.g., buildings) based on the number of units available in each container. The agent and container unit totals are controled to a geography. :param agents: A dataframe with agents to be assigned. :param containers: A dataframe to which the agents will be assigned :param geography_id_col: The column id in both input dataframes for identifying the control geography zones :param containers_units_col: The column in the container dataframe enumerating number of slots in container for agents :type agents: pandas.DataFrame :type containers: pandas.DataFrame :type geography_id_col: string :type containers_units_col: string :return: Summary dataframe of allocation :rtype: pandas.DataFrame """ audit_df = pd.DataFrame( data=np.zeros((len(np.unique(agents[geography_id_col])), 3), dtype=np.int) ,index=np.unique(agents[geography_id_col]) ,columns=['demand','supply','residual']) empty_units = containers[containers[containers_units_col] > 0][containers_units_col].sort_values(ascending=False)