Пример #1
0
    def run(self):

        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print'File exists'
        else:
            db_run_id = log.new_run(name='emp_run_log')
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')
            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            db_connection_string = database.get_connection_string('model_config.yml', 'in_db')
            sql_in_engine = create_engine(db_connection_string)

            rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')
            tables = util.yaml_to_dict('model_config.yml', 'db_tables')
            in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], rate_versions['inc_pop'])
            in_query2 = getattr(sql, 'inc_pop_mil') % (tables['population_table'], rate_versions['population'])

            pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])
            pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])

            pop = pop.join(pop_mil)
            pop['persons'] = (pop['persons'] - pop['mil_mildep'])
            pop = pop.reset_index(drop=False)

            pop['age_cat'] = ''
            pop.loc[pop['age'].isin(list(range(0, 5))), ['age_cat']] = '00_04'
            pop.loc[pop['age'].isin(list(range(5, 10))), ['age_cat']] = '05_09'
            pop.loc[pop['age'].isin(list(range(10, 15))), ['age_cat']] = '10_14'
            pop.loc[pop['age'].isin(list(range(15, 18))), ['age_cat']] = '15_17'
            pop.loc[pop['age'].isin(list(range(18, 20))), ['age_cat']] = '18_19'
            pop.loc[pop['age'].isin(list(range(20, 21))), ['age_cat']] = '20_20'
            pop.loc[pop['age'].isin(list(range(21, 22))), ['age_cat']] = '21_21'
            pop.loc[pop['age'].isin(list(range(22, 25))), ['age_cat']] = '22_24'
            pop.loc[pop['age'].isin(list(range(25, 30))), ['age_cat']] = '25_29'
            pop.loc[pop['age'].isin(list(range(30, 35))), ['age_cat']] = '30_34'
            pop.loc[pop['age'].isin(list(range(35, 40))), ['age_cat']] = '35_39'
            pop.loc[pop['age'].isin(list(range(40, 45))), ['age_cat']] = '40_44'
            pop.loc[pop['age'].isin(list(range(45, 50))), ['age_cat']] = '45_49'
            pop.loc[pop['age'].isin(list(range(50, 55))), ['age_cat']] = '50_54'
            pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59'
            pop.loc[pop['age'].isin(list(range(60, 62))), ['age_cat']] = '60_61'
            pop.loc[pop['age'].isin(list(range(62, 65))), ['age_cat']] = '62_64'
            pop.loc[pop['age'].isin(list(range(65, 67))), ['age_cat']] = '65_66'
            pop.loc[pop['age'].isin(list(range(67, 70))), ['age_cat']] = '67_69'
            pop.loc[pop['age'].isin(list(range(70, 75))), ['age_cat']] = '70_74'
            pop.loc[pop['age'].isin(list(range(75, 80))), ['age_cat']] = '75_79'
            pop.loc[pop['age'].isin(list(range(80, 85))), ['age_cat']] = '80_84'
            pop.loc[pop['age'].isin(list(range(85, 103))), ['age_cat']] = '85_99'

            pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat'], pop['sex'], pop['race_ethn']]).sum())
            print pop.head()
            pop.to_hdf('temp/data.h5', 'pop', mode='a')
Пример #2
0
    def run(self):

        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))
        db_connection_string = database.get_connection_string(
            'model_config.yml', 'in_db')
        sql_in_engine = create_engine(db_connection_string)

        in_query = getattr(sql, 'max_run_id')
        db_run_id = pd.read_sql(in_query, engine, index_col=None)
        # db_run_id = log.new_run(name='inc_run_log', run_id=db_run_id['max'].iloc[0])

        run_id = pd.Series([db_run_id['max'].iloc[0]])
        run_id.to_hdf('temp/data.h5', 'run_id', mode='a')

        rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')
        tables = util.yaml_to_dict('model_config.yml', 'db_tables')

        in_query = getattr(sql,
                           'inc_pop') % (tables['inc_pop_table'], run_id[0])
        in_query2 = getattr(sql, 'inc_mil_hh_pop') % (
            tables['population_table'], rate_versions['population'])

        pop = pd.read_sql(in_query,
                          engine,
                          index_col=['age', 'race_ethn', 'sex', 'mildep'])
        pop_mil = pd.read_sql(in_query2,
                              sql_in_engine,
                              index_col=['age', 'race_ethn', 'sex', 'mildep'])

        pop = pop.join(pop_mil)
        pop['persons'] = (pop['persons'] - pop['mil_mildep'])
        pop = pop.reset_index(drop=False)
        pop = pop[pop['age'] >= 18]

        pop['age_cat'] = ''

        pop.loc[pop['age'].isin(list(range(18, 25))), ['age_cat']] = '18_24'
        pop.loc[pop['age'].isin(list(range(25, 35))), ['age_cat']] = '25_34'
        pop.loc[pop['age'].isin(list(range(35, 45))), ['age_cat']] = '35_44'
        pop.loc[pop['age'].isin(list(range(45, 55))), ['age_cat']] = '45_54'
        pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59'
        pop.loc[pop['age'].isin(list(range(60, 65))), ['age_cat']] = '60_64'
        pop.loc[pop['age'].isin(list(range(65, 75))), ['age_cat']] = '65_74'
        pop.loc[pop['age'].isin(list(range(75, 103))), ['age_cat']] = '75_99'

        pop = pd.DataFrame(pop['persons'].groupby([pop['yr'],
                                                   pop['age_cat']]).sum())

        pop.to_hdf('temp/data.h5', 'pop', mode='a')
Пример #3
0
    def run(self):
        engine = create_engine(get_connection_string("model_config.yml", 'output_database'))

        pop = pd.read_hdf('temp/data.h5', 'pop')
        inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', index=['yr', 'age_cat'])
        inc_type_rates = inc_type_rates.join(pop)
        inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share'])
        inc_type_rates = inc_type_rates.reset_index(drop=False)

        inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum())

        inc_type_rates = inc_type_rates.reset_index(drop=False)
        inc_type_rates = pd.pivot_table(inc_type_rates, values='totals',
                            index=['yr'],
                            columns=['income_type'])
        # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a')

        inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True)
        inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True)
        inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True)
        inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True)
        inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True)
        inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True)

        inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement',
                                         'Supplemental_Social_Security', 'Social_Security']]

        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]

        inc_type_rates['run_id'] = run_id
        inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)
Пример #4
0
    def run(self):
        engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]
        population_summary = []  # initialize list for population by year
        pop = pd.read_hdf('temp/data.h5', 'pop')
        mig_out = pd.read_hdf('temp/data.h5', 'mig_out')
        mig_in = pd.read_hdf('temp/data.h5', 'mig_in')
        dead_pop = pd.read_hdf('temp/data.h5', 'dead_pop')
        new_born = pd.read_hdf('temp/data.h5', 'new_born')
        population_summary.append({'Year': self.year,
                                   'Run_id': run_id,
                                   'Population': pop['persons'].sum(),
                                   'mig_out': mig_out['mig_Dout'].sum() + mig_out['mig_Fout'].sum(),
                                   'mig_in': mig_in['mig_Din'].sum() + mig_in['mig_Fin'].sum(),
                                   'deaths': dead_pop['deaths'].sum(),
                                   'new_born': new_born['new_born'].sum()})

        for table in [pop, mig_out, mig_in, dead_pop, new_born]:
            # df = pd.read_hdf('temp/data.h5', table)
            table['yr'] = self.year
            table['run_id'] = run_id

        pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
        mig_out.to_sql(name='mig_out', con=engine, schema='defm', if_exists='append', index=True)
        mig_in.to_sql(name='mig_in', con=engine, schema='defm', if_exists='append', index=True)
        dead_pop.to_sql(name='dead_pop', con=engine, schema='defm', if_exists='append', index=True)
        new_born.to_sql(name='new_born', con=engine, schema='defm', if_exists='append', index=True)

        summary_df = pd.DataFrame(population_summary)
        summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
Пример #5
0
def new_run(name='runs', run_id=None, econ_id=0, dem_id=0):
    Base = declarative_base()
    table_name = name

    class Run(Base):
        __tablename__ = table_name
        __table_args__ = {'schema': 'defm'}
        # define columns for the table
        id = Column(Integer, primary_key=True)
        economic_scenario_id = Column(Integer)
        demographic_scenario_id = Column(Integer)

    #metadata = MetaData(schema="defm")

    engine = create_engine(
        get_connection_string(
            "model_config.yml",
            'output_database')).execution_options(schema_translate_map={
                None: "defm",  # no schema name -> "defm"
            })
    Base.metadata.schema = 'defm'
    if not engine.has_table(table_name, schema='defm'):
        Base.metadata.create_all(engine)

    db_session = sessionmaker(bind=engine)
    session = db_session()

    # Insert versions in database
    model_run = Run(economic_scenario_id=econ_id,
                    demographic_scenario_id=dem_id)

    session.add(model_run)
    session.commit()
    run_id = model_run.id
    return run_id
Пример #6
0
    def run(self):
        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))

        sectoral_share = extract.create_df('sectoral_share',
                                           'sectoral_share_table',
                                           index=['yr', 'sandag_sector'])
        sectoral_pay = extract.create_df('sectoral_pay',
                                         'sectoral_pay_table',
                                         index=['yr', 'sandag_sector'])

        jobs = pd.read_hdf('temp/data.h5', 'jobs')

        jobs = jobs[['jobs_total']]
        jobs = jobs.join(sectoral_share, how='right')
        jobs['sector_jobs'] = (jobs['jobs_total'] * jobs['share']).round()
        jobs = jobs.drop(['jobs_total'], 1)

        jobs = jobs.join(sectoral_pay)
        jobs['tot_ann_job_pay'] = (jobs['sector_jobs'] *
                                   jobs['annual_pay']).round()

        jobs.to_hdf('temp/data.h5', 'sectoral', mode='a')
        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]
        jobs['run_id'] = run_id

        jobs.to_sql(name='sectors',
                    con=engine,
                    schema='defm',
                    if_exists='append',
                    index=True)
Пример #7
0
def to_database(scenario=' ', rng=range(0, 0), urbansim_connection=get_connection_string("configs/dbconfig.yml", 'urbansim_database'),
                default_schema='urbansim_output'):
    """ df_name:
            Required parameter, is the name of the table that will be read from the H5 file,
            Also first half of the table name to be stored in the database
        urbansim_connection:
            sql connection, default is for urbansim_database
        year:
            year of information to be caputured, should be pass the same range as simulation period
            minus first and last year.
        defalut_schema:
            The schema name under which to save the data, default is urbansim_output
    """
    conn = psycopg2.connect(database="urbansim", user="******", password="******", host="socioeca8",
                            port="5432")
    cursor = conn.cursor()
    t = (scenario,)
    cursor.execute('SELECT scenario_id FROM urbansim_output.parent_scenario WHERE scenario_name=%s', t)
    scenario_id = cursor.fetchone()
    cursor.execute('SELECT parent_scenario_id FROM urbansim_output.parent_scenario WHERE scenario_name=%s', t)
    parent_scenario_id = cursor.fetchone()
    conn.close()

    for year in rng:
        if year == 0 and scenario_id[0] == 1:
            for x in ['parcels', 'buildings', 'jobs']:

                print 'exporting ' + x + str(year) + ' ' + str(scenario_id[0])

                df = pd.read_hdf('data\\results.h5', 'base/' + x)
                df['parent_scenario_id'] = parent_scenario_id[0]
                df.to_sql(x + '_base', urbansim_connection, schema=default_schema, if_exists='append')
        elif year == rng[len(rng)-1]:
            for x in ['buildings', 'feasibility', 'jobs']:
                print 'exporting ' + x + str(year) + ' ' + str(scenario_id[0])

                df = pd.read_hdf('data\\results.h5', str(year) + '/' + x)
                if x == 'feasibility':
                    df = df['residential']
                    df.rename(columns={'total_sqft': 'total_sqft_existing_bldgs'}, inplace=True)
                    df = df[(df.addl_units > 0) or (df.non_residential_sqft > 0)]
                    df['existing_units'] = np.where(df['new_built_units'] == 0, df['total_residential_units'], \
                                                    df['total_residential_units'] - df['addl_units'])

                elif x == 'buildings':
                    df = df[df.new_bldg == 1]
                    df.sch_dev = df.sch_dev.astype(int)
                    df.new_bldg = df.new_bldg.astype(int)

                elif x == 'jobs':
                    df = df[df.index > get_max_job_id()]
                df['year'] = year
                df['scenario_id'] = scenario_id[0]
                df['parent_scenario_id'] = parent_scenario_id[0]

                df.to_sql(x, urbansim_connection, schema=default_schema, if_exists='append')
Пример #8
0
    def run(self):

        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print 'File exists'
        else:
            db_run_id = log.new_run()
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id', mode='a')
            pop = extract.create_df('population', 'population_table')
            pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a')

            pop2 = pop[(pop['type'] == 'HHP')]
            pop2 = pop2.reset_index(drop=False)

            pop2 = pd.DataFrame(pop2['persons'].groupby(
                [pop2['age'], pop2['race_ethn'], pop2['sex']]).sum())
            pop2.rename(columns={'persons': 'persons_sum'}, inplace=True)

            pop2 = pop.join(pop2)
            pop2['rates'] = np.where(pop2['type'].isin(['INS', 'OTH']),
                                     (pop2['persons'] / pop2['persons_sum']),
                                     0)

            rates = pop2[['mildep', 'type', 'rates']]
            rates.to_hdf('temp/data.h5',
                         'ins_oth_rates',
                         format='table',
                         mode='a')

            engine = create_engine(
                get_connection_string("model_config.yml", 'output_database'))
            population_summary = []
            population_summary.append({
                'Year': self.year - 1,
                'Run_id': run_id[0],
                'Population': pop['persons'].sum(),
                'mig_out': 0,
                'mig_in': 0,
                'deaths': 0,
                'new_born': 0
            })

            summary_df = pd.DataFrame(population_summary)
            summary_df.to_sql(name='population_summary',
                              con=engine,
                              schema='defm',
                              if_exists='append',
                              index=False)
            pop['yr'] = 2010
            pop['run_id'] = db_run_id
            pop.to_sql(name='population',
                       con=engine,
                       schema='defm',
                       if_exists='append',
                       index=True)
Пример #9
0
    def run(self):

        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))
        db_connection_string = database.get_connection_string(
            'model_config.yml', 'in_db')
        sql_in_engine = create_engine(db_connection_string)

        in_query = getattr(sql, 'max_run_id')
        db_run_id = pd.read_sql(in_query, engine, index_col=None)

        run_id = pd.Series([db_run_id['id'].iloc[0]])
        run_id.to_hdf('temp/data.h5', 'run_id', mode='a')

        tables = util.yaml_to_dict('model_config.yml', 'db_tables')

        dem_sim_rates = pd.read_hdf('temp/data.h5', 'dem_sim_rates')

        in_query = getattr(
            sql, 'inc_mil_gc_pop') % (tables['inc_pop_table'], run_id[0])
        in_query2 = getattr(sql, 'inc_mil_hh_pop') % (
            tables['population_table'], dem_sim_rates.base_population_id[0])

        pop = pd.read_sql(in_query,
                          engine,
                          index_col=['age', 'race_ethn', 'sex'])
        pop_mil = pd.read_sql(in_query2,
                              sql_in_engine,
                              index_col=['age', 'race_ethn', 'sex'])

        pop_mil = pop_mil.loc[pop_mil['mildep'] == 'Y']
        pop = pop.join(pop_mil)

        pop.rename(columns={'persons': 'mil_gc_pop'}, inplace=True)
        pop.rename(columns={'mil_mildep': 'mil_hh_pop'}, inplace=True)

        pop = pop.reset_index(drop=False)

        pop = pd.DataFrame(pop[['mil_gc_pop',
                                'mil_hh_pop']].groupby([pop['yr']]).sum())
        pop.to_hdf('temp/data.h5', 'mil_pop', mode='a')
Пример #10
0
def insert_run(db_name, model_run_id, df_results, table_name):

    engine = create_engine(
        get_connection_string("model_config.yml", 'output_database'))

    # Insert prediction in the population table
    df_results['run_id'] = model_run_id  # foreign key to run log table
    df_results.to_sql(name=table_name,
                      con=engine,
                      schema='defm',
                      if_exists='append',
                      index=True)
    df_results = df_results.drop('run_id', 1)  # remove run_id
Пример #11
0
    def run(self):
        engine = create_engine(
            get_connection_string("model_config.yml", 'output_database'))
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')

        trs_rates = extract.create_df('trs',
                                      'trs_table',
                                      rate_id=econ_sim_rates.trs_id[0],
                                      index=['yr'])

        hh_income = pd.read_hdf('temp/data.h5', 'hh_income')
        mil_income = pd.read_hdf('temp/data.h5', 'mil_income')
        ue_income = pd.read_hdf('temp/data.h5', 'ue_income')
        inc = hh_income.join(mil_income)
        inc = inc.join(ue_income)
        inc = inc.join(trs_rates)

        inc['unearned_income'] = (inc['Interest'] + inc['Other'] +
                                  inc['Public_Assistance'] +
                                  inc['Retirement'] +
                                  inc['Supplemental_Social_Security'] +
                                  inc['Social_Security']).round()

        inc['personal_income'] = (inc['jobs_local_wages'] +
                                  inc['wf_outside_wages'] +
                                  inc['unearned_income'] +
                                  inc['Selfemp_Income'] +
                                  inc['military_income']).round()

        inc['taxable_retail_sales'] = (inc['personal_income'] *
                                       inc['trs_pct']).round()

        inc = inc[[
            'labor_force', 'unemployed', 'work_force', 'work_force_outside',
            'work_force_local', 'jobs_local', 'jobs_total', 'jobs_external',
            'avg_wage', 'jobs_total_wages', 'jobs_local_wages',
            'jobs_external_wages', 'wf_outside_wages', 'military_income',
            'unearned_income', 'Selfemp_Income', 'personal_income',
            'taxable_retail_sales'
        ]]

        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]
        inc['run_id'] = run_id

        inc.to_sql(name='emp_summary',
                   con=engine,
                   schema='defm',
                   if_exists='append',
                   index=True)
Пример #12
0
def new_run(name='runs'):
    Base = declarative_base()
    table_name = name

    class Run(Base):
        __tablename__ = table_name
        __table_args__ = {'schema': 'defm'}
        # define columns for the table
        id = Column(Integer, primary_key=True)
        base_rate_version = Column(Integer)
        birth_rate_version = Column(Integer)
        death_rate_version = Column(Integer)
        migration_rate_version = Column(Integer)
        householder_rate_version = Column(Integer)

    #metadata = MetaData(schema="defm")

    db_dir = 'results/'
    if not os.path.exists(db_dir):

        os.makedirs(db_dir)

    engine = create_engine(
        get_connection_string(
            "model_config.yml",
            'output_database')).execution_options(schema_translate_map={
                None: "defm",  # no schema name -> "defm"
            })
    Base.metadata.schema = 'defm'
    if not engine.has_table(table_name, schema='defm'):
        Base.metadata.create_all(engine)

    db_session = sessionmaker(bind=engine)
    session = db_session()

    # Rate versions from yml file
    rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')

    # Insert versions in database
    model_run = Run(base_rate_version=rate_versions['population'],
                    birth_rate_version=rate_versions['birth'],
                    death_rate_version=rate_versions['death'],
                    migration_rate_version=rate_versions['migration'],
                    householder_rate_version=rate_versions['householder'])

    session.add(model_run)
    session.commit()
    run_id = model_run.id
    return run_id
Пример #13
0
def create_df(data_type,
              db_table,
              pivot=False,
              index=['age', 'race_ethn', 'sex']):
    """
    Create pandas DataFrame from database SQL query to select base population
    or rate versions to be used in model.

    Args:
        data_type : string
            type of data (e.g. birth, migration, population)
        db_table : string
            database table name
        pivot : boolean, optional (default False)

    Returns:
        df_sql_result : pandas DataFrame
            SQL query result
    """

    # connect to database using SQLAlchemy
    db_connection_string = database.get_connection_string(
        'model_config.yml', 'in_db')
    sql_in_engine = create_engine(db_connection_string)

    # retrieve rate versions for current model and database table names to query
    rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')
    tables = util.yaml_to_dict('model_config.yml', 'db_tables')

    # build query from sql.py
    # use database table name and rate versions from .yml file
    in_query = getattr(
        sql, data_type) % (tables[db_table], rate_versions[data_type])

    # pandas DataFrame from query
    df_sql_result = pd.read_sql(in_query, sql_in_engine)

    # Special case for migration rates: pivot DataFrame since 4 rates in cols
    #       rates are: domestic in, domestic out, foreign in, foreign out
    if pivot:
        df_sql_result = util.apply_pivot(df_sql_result)

    # create MultiIndex on cohort attributes
    if index is not None:
        df_sql_result = df_sql_result.set_index(index)

    return df_sql_result
Пример #14
0
    def run(self):
        engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
        econ_sim_rates = pd.read_hdf('temp/data.h5', 'econ_sim_rates')

        pop = pd.read_hdf('temp/data.h5', 'pop')
        inc_type_rates = extract.create_df('inc_shares', 'inc_shares_table', rate_id=econ_sim_rates.inc1_id[0], index=['yr', 'age_cat'])

        inc_type_rates = inc_type_rates.join(pop)
        inc_type_rates['totals'] = (inc_type_rates['income'] * inc_type_rates['persons'] * inc_type_rates['share'])
        inc_type_rates = inc_type_rates.reset_index(drop=False)

        inc_type_rates['multiplier'] = 0

        aigr_table = extract.create_df('aigr', 'aigr_table', rate_id=econ_sim_rates.aigr_id[0], index=None)

        inc_type_rates.loc[inc_type_rates['yr'] > 2014, ['multiplier']] = (aigr_table.aigr[0] * (inc_type_rates['yr'] - 2014))

        # pow(1.01, mil_wages.index.get_level_values('yr') - 2014)

        inc_type_rates['totals'] = (inc_type_rates['totals'] + inc_type_rates['totals'] * inc_type_rates['multiplier'])

        inc_type_rates = pd.DataFrame(inc_type_rates['totals'].groupby([inc_type_rates['yr'], inc_type_rates['income_type']]).sum())

        inc_type_rates = inc_type_rates.reset_index(drop=False)
        inc_type_rates = pd.pivot_table(inc_type_rates, values='totals',
                            index=['yr'],
                            columns=['income_type'])
        # inc_type_rates.to_hdf('temp/data.h5', 'inc_type_rates', mode='a')

        inc_type_rates.rename(columns={'intp': 'Interest'}, inplace=True)
        inc_type_rates.rename(columns={'oip': 'Other'}, inplace=True)
        inc_type_rates.rename(columns={'pap': 'Public_Assistance'}, inplace=True)
        inc_type_rates.rename(columns={'retp': 'Retirement'}, inplace=True)
        inc_type_rates.rename(columns={'ssip': 'Supplemental_Social_Security'}, inplace=True)
        inc_type_rates.rename(columns={'ssp': 'Social_Security'}, inplace=True)
        inc_type_rates.rename(columns={'semp': 'Selfemp_Income'}, inplace=True)

        inc_type_rates = inc_type_rates[['Interest', 'Other', 'Public_Assistance', 'Retirement',
                                         'Supplemental_Social_Security', 'Social_Security', 'Selfemp_Income']]

        inc_type_rates.to_hdf('temp/data.h5', 'ue_income')
        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]

        inc_type_rates['run_id'] = run_id
        inc_type_rates.to_sql(name='non_wage_income', con=engine, schema='defm', if_exists='append', index=True)
Пример #15
0
    def run(self):
        engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
        run_table = pd.read_hdf('temp/data.h5', 'run_id')
        run_id = run_table[0]
        population_summary = []  # initialize list for population by year
        pop = pd.read_hdf('temp/data.h5', 'pop')
        mig_out = pd.read_hdf('temp/data.h5', 'mig_out')
        mig_in = pd.read_hdf('temp/data.h5', 'mig_in')
        dead_pop = pd.read_hdf('temp/data.h5', 'dead_pop')
        new_born = pd.read_hdf('temp/data.h5', 'new_born')
        population_summary.append({'Year': self.year,
                                   'Run_id': run_id,
                                   'Population': pop['persons'].sum(),
                                   'mig_out': mig_out['mig_Dout'].sum() + mig_out['mig_Fout'].sum(),
                                   'mig_in': mig_in['mig_Din'].sum() + mig_in['mig_Fin'].sum(),
                                   'deaths_hhp_non_mil': dead_pop['deaths_hhp_non_mil'].sum(),
                                   'new_born': new_born['new_born'].sum()})

        for table in [pop, mig_out, mig_in, dead_pop, new_born]:
            table['yr'] = self.year
            table['run_id'] = run_id

        pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)

        # pop.to_csv('temp/pop.csv',  header=False)
        # csv_data = csv.reader(file('temp/pop.csv'))
        # database = psycopg2.connect(get_connection_string("model_config.yml", 'output_database'))
        # cursor = database.cursor()
        # for row in csv_data:
        #     cursor.execute("INSERT INTO defm.population(age, race_ethn, sex, type, mildep, persons, households, yr,"
        #                    " run_id) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", row)
#
        # cursor.close()
        # database.commit()
        # database.close()

        mig_out.to_sql(name='mig_out', con=engine, schema='defm', if_exists='append', index=True)
        mig_in.to_sql(name='mig_in', con=engine, schema='defm', if_exists='append', index=True)
        dead_pop.to_sql(name='dead_pop', con=engine, schema='defm', if_exists='append', index=True)
        new_born.to_sql(name='new_born', con=engine, schema='defm', if_exists='append', index=True)

        summary_df = pd.DataFrame(population_summary)
        summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
Пример #16
0
def create_df(data_type,db_table,pivot=False):
    """
    Create pandas DataFrame from database SQL query to select base population
    or rate versions to be used in model.

    Args:
        data_type : string
            type of data (e.g. birth, migration, population)
        db_table : string
            database table name
        pivot : boolean, optional (default False)

    Returns:
        df_sql_result : pandas DataFrame
            SQL query result
    """

    # connect to database using SQLAlchemy
    db_connection_string = database.get_connection_string('model_config.yml', 'in_db')
    sql_in_engine = create_engine(db_connection_string)

    # retrieve rate versions for current model and database table names to query
    rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')
    tables = util.yaml_to_dict('model_config.yml', 'db_tables')

    # build query from sql.py
    # use database table name and rate versions from .yml file
    in_query = getattr(sql,data_type) % (tables[db_table],rate_versions[data_type])

    # pandas DataFrame from query
    df_sql_result = pd.read_sql(in_query, sql_in_engine)

    # Special case for migration rates: pivot DataFrame since 4 rates in cols
    #       rates are: domestic in, domestic out, foreign in, foreign out
    if pivot:
        df_sql_result = util.apply_pivot(df_sql_result)

    # create MultiIndex on cohort attributes
    df_sql_result = df_sql_result.set_index(['age','race_ethn','sex'])

    return df_sql_result
Пример #17
0
    def test_get_connection_string(self):
        compare_string = 'mssql+pyodbc://user_name:a_password@sqlserverdatabase/a_database_name?driver=SQL+Server+Native+Client+11.0'
        self.assertEqual(database.get_connection_string('db.yml', 'in_db'), compare_string)

        cfg = {
                'in_db': {
                    'sql_alchemy_driver': 'mssql+pyodbc',
                    'driver': 'SQL+Server+Native+Client+11.0',
                    'host': 'sqlserverdatabase',
                    'database': 'a_database_name',
                    'port:': '',
                    'user': '******',
                    'password': '******'},
                'out_db': {
                    'sql_alchemy_driver': 'postgresql',
                    'driver': '',
                    'host': '',
                    'database': '',
                    'port': '',
                    'user': '',
                    'password': ''}
        }
Пример #18
0
    def run(self):
        # create file only first year, exists in subsequent years
        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print'File exists'
        else: # only first year
            db_run_id = log.new_run(dem_id=self.dem_id, econ_id=self.econ_id)
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')

            dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table',
                                              rate_id=self.dem_id, index=None)

            dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates',  mode='a')

            pop = extract.create_df('population', 'population_table', rate_id=dem_sim_rates.base_population_id[0])
            pop.to_hdf('temp/data.h5', 'pop', mode='a')

            # Create function here and test
            # to get ratio of INS and OTH to HHP to keep constant
            rates = cp.compute_ins_oth_rate(pop)

            rates.to_hdf('temp/data.h5', 'ins_oth_rates', mode='a')

            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            population_summary = []
            population_summary.append({'Year': self.year - 1,
                                       'Run_id': run_id[0],
                                       'Population': pop['persons'].sum(),
                                       'mig_out': 0,
                                       'mig_in': 0,
                                       'deaths_hhp_non_mil': 0,
                                       'new_born': 0})

            summary_df = pd.DataFrame(population_summary)
            summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
            pop['yr'] = self.year - 1
            pop['run_id'] = db_run_id
            pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
default_emfac_vmt = pd.read_csv(input_path)
default_emfac_vmt['emfac_vehicle_class'] = default_emfac_vmt.apply(
    lambda x: x.vehicle_class.upper() + ' - ' + x.fuel.upper(), axis=1)
calendar_year = default_emfac_vmt['calendar_year'].min()

# RESET THE INDEX
default_emfac_vmt = default_emfac_vmt[['emfac_vehicle_class',
                                       'vmt']].set_index('emfac_vehicle_class')

default_emfac_vmt.ix['LDA - GAS']['vmt'] = \
    default_emfac_vmt.ix['LDA - GAS']['vmt'] + default_emfac_vmt.ix['LDA - ELEC']['vmt']
default_emfac_vmt.ix['LDT1 - GAS']['vmt'] = \
    default_emfac_vmt.ix['LDT1 - GAS']['vmt'] + default_emfac_vmt.ix['LDT1 - ELEC']['vmt']

# READ IN THE VEHICLE CLASSES FROM THE DATABASE
sql_in_engine = create_engine(get_connection_string("dbconfig.yml", 'in_db'))

emfac_class_mapping = pd.read_sql(emfac_vehicle_class_sql,
                                  sql_in_engine,
                                  index_col='emfac_vehicle_class')

# JOIN DATA AND CLEAN-UP ELECTRIC VEHICLE
emfac_vmt = default_emfac_vmt.join(emfac_class_mapping, how='outer')
emfac_vmt = emfac_vmt[~emfac_vmt.index.isin(['LDA - ELEC', 'LDT1 - ELEC'])]
emfac_vmt.fillna(0, inplace=True)
emfac_vmt.reset_index(inplace=True)
emfac_vmt.sort(columns="emfac_vehicle_class_id", inplace=True)
emfac_vmt.set_index('emfac_vehicle_class_id', inplace=True)

# GROUP BY AND GET PERCENTAGE BY GROUP
class_map = pd.read_sql(class_map_sql,
Пример #20
0
import models, utils
import orca
from sqlalchemy import create_engine
from pysandag.database import get_connection_string

orca.run(
    [
        "feasibility",  # compute development feasibility
        "residential_developer"  # build residential buildings
    ],
    iter_vars=range(2016, 2051))

db_connection_string = get_connection_string('data\config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

buildings = orca.get_table('buildings').to_frame()
buildings = buildings.reset_index(drop=False)
buildings = buildings.loc[(buildings['building_id'] > 2889578)]
buildings['run_id'] = 1
buildings['run_desc'] = 'random'
buildings.to_sql(name='urbansim_lite_output',
                 con=mssql_engine,
                 schema='urbansim',
                 if_exists='append',
                 index=False)
Пример #21
0
    #'employment_controls',
    #'fee_schedule',
    #'household_controls'
    #'households',
    #'jobs',
    #'nodes',
    #'parcels',
    #'parks',
    #'scheduled_development_event',
    #'schools',
    #'transit',
    #'zoning_allowed_use',
    #'zoning',
]

sql_in_engine = create_engine(get_connection_string("dbconfig.yml", 'in_db'))
sql_out_engine = create_engine(get_connection_string("dbconfig.yml", 'out_db'))
schema = datasets['schema']

metadata = MetaData(bind=sql_out_engine, schema=schema)

##PROCESS SELECTED DATASETS
for key in selected:
    dataset = datasets[key]
    print ">>> {0}".format(key)

    ##INPUT QUERY
    in_query_non_spatial = dataset['in_query_non_spatial']

    ##Pandas Data Frame for non-spatial data
    df_non_spatial = pd.read_sql(in_query_non_spatial, sql_in_engine, index_col= dataset['index_col'])
Пример #22
0
from __future__ import print_function
from sqlalchemy import create_engine
from pysandag.database import get_connection_string
import pandas as pd

postgres_engine = create_engine(
    get_connection_string("config.yml", 'postgres_database'))

db_connection_string = get_connection_string('config.yml', 'mssql_db')
mssql_engine = create_engine(db_connection_string)

parcels_sql = '''
SELECT  parcel_id
      ,jurisdiction_id
      ,jurisdiction_name
      ,building_type_id
      ,capacity
      ,residential_units
      ,total_cap
      ,num_of_bldgs
      ,distance_to_coast
  FROM urbansim.urbansim.input_residential_capacity
'''

households_sql = '''
    SELECT
        yr AS year
        ,sum(households) AS hh
    FROM urbansim.urbansim.household_control
    GROUP BY yr
    ORDER BY yr
Пример #23
0
from bokeh.io import curdoc, gridplot
from bokeh.layouts import row, widgetbox, column
from bokeh.models import ColumnDataSource, LabelSet, Plot, DataRange1d, LinearAxis, Grid, LassoSelectTool, WheelZoomTool, SaveTool, ResetTool
from bokeh.models.widgets import Slider, TextInput
from bokeh.plotting import figure, output_file, show
from bokeh.charts import Bar, output_file, show
from bokeh.models.glyphs import HBar
from bokeh.models import (
    ColumnDataSource, HoverTool, SingleIntervalTicker, Slider, Button, Label,
    CategoricalColorMapper, ranges
)
from bokeh.layouts import layout
from forecast import util
from db import sql

defm_engine = create_engine(get_connection_string("model_config.yml", 'output_database'))

db_connection_string = database.get_connection_string('model_config.yml', 'in_db')
sql_in_engine = create_engine(db_connection_string)


in_query = getattr(sql, 'max_run_id')
db_run_id = pd.read_sql(in_query, defm_engine, index_col=None)

run_id = db_run_id['id'].iloc[0]

results_sql = '''SELECT "Population" as pop_py
                        ,"Run_id"
                        ,"Year"
                        ,deaths_hhp_non_mil as deaths_py
                        ,mig_in - mig_out as net_mig_py
Пример #24
0
from sqlalchemy import create_engine
from pysandag.database import get_connection_string
import pandas as pd

urbansim_engine = create_engine(get_connection_string("configs/dbconfig.yml", 'urbansim_database'))

nodes_sql = 'SELECT node as node_id, x, y, on_ramp FROM urbansim.nodes'
edges_sql = 'SELECT from_node as [from], to_node as [to], distance as [weight] FROM urbansim.edges'
parcels_sql = 'SELECT parcel_id, luz_id, parcel_acres as acres, zoning_id, centroid.STX as x, centroid.STY as y, distance_to_coast, distance_to_freeway FROM urbansim.parcels'
buildings_sql = 'SELECT building_id, parcel_id, development_type_id as building_type_id, COALESCE(residential_units, 0) as residential_units, residential_sqft, COALESCE(non_residential_sqft,0) as non_residential_sqft, 0 as non_residential_rent_per_sqft, COALESCE(year_built, -1) year_built, COALESCE(stories, 1) as stories FROM urbansim.buildings'
households_sql = 'SELECT household_id, building_id, persons, age_of_head, income, children FROM urbansim.households'
jobs_sql = 'SELECT job_id, building_id, sector_id FROM urbansim.jobs'
building_sqft_per_job_sql = 'SELECT luz_id, development_type_id, sqft_per_emp FROM urbansim.building_sqft_per_job'
scheduled_development_events_sql = """SELECT
                                         scheduled_development_event_id, parcel_id, development_type_id as building_type_id
                                         ,year_built, sqft_per_unit, residential_units, non_residential_sqft
                                         ,improvement_value, res_price_per_sqft, nonres_rent_per_sqft as non_residential_rent_per_sqft
                                         ,COALESCE(stories,1) as stories FROM urbansim.scheduled_development_event"""
schools_sql = """SELECT objectID as id, Shape.STX as x ,Shape.STY as y FROM gis.schools WHERE SOCType IN ('Junior High Schools (Public)','K-12 Schools (Public)','Preschool','Elemen Schools In 1 School Dist. (Public)','Elementary Schools (Public)','Intermediate/Middle Schools (Public)','High Schools (Public)','Private')"""
parks_sql = """SELECT subparcel as park_id, shape.STCentroid().STX x, shape.STCentroid().STY y FROM gis.landcore WHERE lu IN (7207,7210,7211,7600,7601,7604,7605)"""
transit_sql = 'SELECT x, y, stopnum FROM gis.transit_stops'
household_controls_sql = """SELECT yr as [year], hh_income_id as income_quartile, hh FROM isam.defm.households WHERE dem_version = 'S0021' and eco_version = '001' AND yr >= 2015"""
employment_controls_sql = """SELECT yr as [year], jobs as number_of_jobs, sector_id FROM isam.defm.jobs WHERE dem_version = 'S0021' and eco_version = '001' AND yr >= 2015"""
zoning_allowed_uses_sql = """SELECT development_type_id, zoning_id FROM urbansim.zoning_allowed_use ORDER BY development_type_id, zoning_id"""
fee_schedule_sql = """SELECT development_type_id, development_fee_per_unit_space_initial FROM urbansim.fee_schedule"""
zoning_sql = """SELECT zoning_id, max_dua, max_building_height as max_height, max_far FROM urbansim.zoning"""

nodes_df = pd.read_sql(nodes_sql, urbansim_engine, index_col='node_id')
edges_df = pd.read_sql(edges_sql, urbansim_engine)
parcels_df = pd.read_sql(parcels_sql, urbansim_engine, index_col='parcel_id')
Пример #25
0
from bokeh.io import curdoc, gridplot
from bokeh.layouts import row, widgetbox, column
from bokeh.models import ColumnDataSource, LabelSet, Plot, DataRange1d, LinearAxis, Grid, LassoSelectTool, WheelZoomTool, SaveTool, ResetTool
from bokeh.models.widgets import Slider, TextInput
from bokeh.plotting import figure, output_file, show
from bokeh.charts import Bar, output_file, show
from bokeh.models.glyphs import HBar
from bokeh.models import (ColumnDataSource, HoverTool, SingleIntervalTicker,
                          Slider, Button, Label, CategoricalColorMapper,
                          ranges)
from bokeh.layouts import layout
from forecast import util
from db import sql

defm_engine = create_engine(
    get_connection_string("model_config.yml", 'output_database'))

db_connection_string = database.get_connection_string('model_config.yml',
                                                      'in_db')
sql_in_engine = create_engine(db_connection_string)

in_query = getattr(sql, 'max_run_id')
db_run_id = pd.read_sql(in_query, defm_engine, index_col=None)

run_id = db_run_id['max'].iloc[0]

results_sql = '''SELECT "Population" as pop_py
                        ,"Run_id"
                        ,"Year"
                        ,deaths as deaths_py
                        ,mig_in - mig_out as net_mig_py
Пример #26
0
import numpy as np
import pandas as pd
from pysandag.database import get_connection_string
from sqlalchemy import create_engine
from urbansim.models.dcm import unit_choice

urbansim_engine = create_engine(get_connection_string("../postgresql/dbconfig.yml", 'in_db'))
#, legacy_schema_aliasing=False)

def random_allocate_agents_by_geography(agents, containers, geography_id_col, containers_units_col):
    """Allocate agents (e.g., households, jobs) to a container (e.g., buildings) based
       on the number of units available in each container. The agent and container unit
       totals are controled to a geography.

    :param agents: A dataframe with agents to be assigned.
    :param containers: A dataframe to which the agents will be assigned
    :param geography_id_col: The column id in both input dataframes for identifying the control geography zones
    :param containers_units_col: The column in the container dataframe enumerating number of slots in container for agents
    :type agents: pandas.DataFrame
    :type containers: pandas.DataFrame
    :type geography_id_col: string
    :type containers_units_col: string
    :return: Summary dataframe of allocation
    :rtype: pandas.DataFrame
    """
    audit_df = pd.DataFrame(
                    data=np.zeros((len(np.unique(agents[geography_id_col])), 3), dtype=np.int)
                    ,index=np.unique(agents[geography_id_col])
                    ,columns=['demand','supply','residual'])
    
    empty_units = containers[containers[containers_units_col] > 0][containers_units_col].sort_values(ascending=False)