示例#1
0
    def run(self):

        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print 'File exists'
        else:
            db_run_id = log.new_run()
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id', mode='a')
            pop = extract.create_df('population', 'population_table')
            pop.to_hdf('temp/data.h5', 'pop', format='table', mode='a')

            pop2 = pop[(pop['type'] == 'HHP')]
            pop2 = pop2.reset_index(drop=False)

            pop2 = pd.DataFrame(pop2['persons'].groupby(
                [pop2['age'], pop2['race_ethn'], pop2['sex']]).sum())
            pop2.rename(columns={'persons': 'persons_sum'}, inplace=True)

            pop2 = pop.join(pop2)
            pop2['rates'] = np.where(pop2['type'].isin(['INS', 'OTH']),
                                     (pop2['persons'] / pop2['persons_sum']),
                                     0)

            rates = pop2[['mildep', 'type', 'rates']]
            rates.to_hdf('temp/data.h5',
                         'ins_oth_rates',
                         format='table',
                         mode='a')

            engine = create_engine(
                get_connection_string("model_config.yml", 'output_database'))
            population_summary = []
            population_summary.append({
                'Year': self.year - 1,
                'Run_id': run_id[0],
                'Population': pop['persons'].sum(),
                'mig_out': 0,
                'mig_in': 0,
                'deaths': 0,
                'new_born': 0
            })

            summary_df = pd.DataFrame(population_summary)
            summary_df.to_sql(name='population_summary',
                              con=engine,
                              schema='defm',
                              if_exists='append',
                              index=False)
            pop['yr'] = 2010
            pop['run_id'] = db_run_id
            pop.to_sql(name='population',
                       con=engine,
                       schema='defm',
                       if_exists='append',
                       index=True)
示例#2
0
    def run(self):

        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print'File exists'
        else:
            db_run_id = log.new_run(name='emp_run_log')
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')
            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            db_connection_string = database.get_connection_string('model_config.yml', 'in_db')
            sql_in_engine = create_engine(db_connection_string)

            rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')
            tables = util.yaml_to_dict('model_config.yml', 'db_tables')
            in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], rate_versions['inc_pop'])
            in_query2 = getattr(sql, 'inc_pop_mil') % (tables['population_table'], rate_versions['population'])

            pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])
            pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])

            pop = pop.join(pop_mil)
            pop['persons'] = (pop['persons'] - pop['mil_mildep'])
            pop = pop.reset_index(drop=False)

            pop['age_cat'] = ''
            pop.loc[pop['age'].isin(list(range(0, 5))), ['age_cat']] = '00_04'
            pop.loc[pop['age'].isin(list(range(5, 10))), ['age_cat']] = '05_09'
            pop.loc[pop['age'].isin(list(range(10, 15))), ['age_cat']] = '10_14'
            pop.loc[pop['age'].isin(list(range(15, 18))), ['age_cat']] = '15_17'
            pop.loc[pop['age'].isin(list(range(18, 20))), ['age_cat']] = '18_19'
            pop.loc[pop['age'].isin(list(range(20, 21))), ['age_cat']] = '20_20'
            pop.loc[pop['age'].isin(list(range(21, 22))), ['age_cat']] = '21_21'
            pop.loc[pop['age'].isin(list(range(22, 25))), ['age_cat']] = '22_24'
            pop.loc[pop['age'].isin(list(range(25, 30))), ['age_cat']] = '25_29'
            pop.loc[pop['age'].isin(list(range(30, 35))), ['age_cat']] = '30_34'
            pop.loc[pop['age'].isin(list(range(35, 40))), ['age_cat']] = '35_39'
            pop.loc[pop['age'].isin(list(range(40, 45))), ['age_cat']] = '40_44'
            pop.loc[pop['age'].isin(list(range(45, 50))), ['age_cat']] = '45_49'
            pop.loc[pop['age'].isin(list(range(50, 55))), ['age_cat']] = '50_54'
            pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59'
            pop.loc[pop['age'].isin(list(range(60, 62))), ['age_cat']] = '60_61'
            pop.loc[pop['age'].isin(list(range(62, 65))), ['age_cat']] = '62_64'
            pop.loc[pop['age'].isin(list(range(65, 67))), ['age_cat']] = '65_66'
            pop.loc[pop['age'].isin(list(range(67, 70))), ['age_cat']] = '67_69'
            pop.loc[pop['age'].isin(list(range(70, 75))), ['age_cat']] = '70_74'
            pop.loc[pop['age'].isin(list(range(75, 80))), ['age_cat']] = '75_79'
            pop.loc[pop['age'].isin(list(range(80, 85))), ['age_cat']] = '80_84'
            pop.loc[pop['age'].isin(list(range(85, 103))), ['age_cat']] = '85_99'

            pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat'], pop['sex'], pop['race_ethn']]).sum())
            print pop.head()
            pop.to_hdf('temp/data.h5', 'pop', mode='a')
示例#3
0
    def run(self):

        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print'File exists'
        else:
            db_run_id = log.new_run(name='inc_run_log')
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')
            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            db_connection_string = database.get_connection_string('model_config.yml', 'in_db')
            sql_in_engine = create_engine(db_connection_string)

            rate_versions = util.yaml_to_dict('model_config.yml', 'rate_versions')
            tables = util.yaml_to_dict('model_config.yml', 'db_tables')
            in_query = getattr(sql, 'inc_pop') % (tables['inc_pop_table'], rate_versions['inc_pop'])
            in_query2 = getattr(sql, 'inc_pop_mil') % (tables['population_table'], rate_versions['population'])

            pop = pd.read_sql(in_query, engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])
            pop_mil = pd.read_sql(in_query2, sql_in_engine, index_col=['age', 'race_ethn', 'sex', 'mildep'])

            pop = pop.join(pop_mil)
            pop['persons'] = (pop['persons'] - pop['mil_mildep'])
            pop = pop.reset_index(drop=False)
            pop = pop[pop['age'] >= 18]

            pop['age_cat'] = ''

            pop.loc[pop['age'].isin(list(range(18, 25))), ['age_cat']] = '18_24'
            pop.loc[pop['age'].isin(list(range(25, 35))), ['age_cat']] = '25_34'
            pop.loc[pop['age'].isin(list(range(35, 45))), ['age_cat']] = '35_44'
            pop.loc[pop['age'].isin(list(range(45, 55))), ['age_cat']] = '45_54'
            pop.loc[pop['age'].isin(list(range(55, 60))), ['age_cat']] = '55_59'
            pop.loc[pop['age'].isin(list(range(60, 65))), ['age_cat']] = '60_64'
            pop.loc[pop['age'].isin(list(range(65, 75))), ['age_cat']] = '65_74'
            pop.loc[pop['age'].isin(list(range(75, 103))), ['age_cat']] = '75_99'

            pop = pd.DataFrame(pop['persons'].groupby([pop['yr'], pop['age_cat']]).sum())

            pop.to_hdf('temp/data.h5', 'pop', mode='a')
示例#4
0
    def run(self):
        # create file only first year, exists in subsequent years
        my_file = Path('temp/data.h5')
        if my_file.is_file():
            print'File exists'
        else: # only first year
            db_run_id = log.new_run(dem_id=self.dem_id, econ_id=self.econ_id)
            run_id = pd.Series([db_run_id])
            run_id.to_hdf('temp/data.h5', 'run_id',  mode='a')

            dem_sim_rates = extract.create_df('dem_sim_rates', 'dem_sim_rates_table',
                                              rate_id=self.dem_id, index=None)

            dem_sim_rates.to_hdf('temp/data.h5', 'dem_sim_rates',  mode='a')

            pop = extract.create_df('population', 'population_table', rate_id=dem_sim_rates.base_population_id[0])
            pop.to_hdf('temp/data.h5', 'pop', mode='a')

            # Create function here and test
            # to get ratio of INS and OTH to HHP to keep constant
            rates = cp.compute_ins_oth_rate(pop)

            rates.to_hdf('temp/data.h5', 'ins_oth_rates', mode='a')

            engine = create_engine(get_connection_string("model_config.yml", 'output_database'))
            population_summary = []
            population_summary.append({'Year': self.year - 1,
                                       'Run_id': run_id[0],
                                       'Population': pop['persons'].sum(),
                                       'mig_out': 0,
                                       'mig_in': 0,
                                       'deaths_hhp_non_mil': 0,
                                       'new_born': 0})

            summary_df = pd.DataFrame(population_summary)
            summary_df.to_sql(name='population_summary', con=engine, schema='defm', if_exists='append', index=False)
            pop['yr'] = self.year - 1
            pop['run_id'] = db_run_id
            pop.to_sql(name='population', con=engine, schema='defm', if_exists='append', index=True)
示例#5
0
文件: defm.py 项目: SANDAG/pydefm
# Housekeeping stuff

# measure script time
start_time = time.time()

# change to current directory to find .yml input config file
full_path = os.path.abspath(inspect.getfile(inspect.currentframe()))
os.chdir(os.path.dirname(full_path))

# set console display to show MultiIndex for every row
pd.set_option('display.multi_sparse', False)

# rate versions to result database & return primary key for table
# db_run_id = log.new_run('model_summary.db')
db_run_id = log.new_run('defm.db')

years = util.yaml_to_dict('model_config.yml', 'years')

# Load rates for all years: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort), 'rate', 'year'
#   pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out

birth_rates = extract.create_df('birth', 'rate_table')
death_rates = extract.create_df('death', 'rate_table')
mig_rates = extract.create_df('migration', 'rate_table', pivot=True)


# Load base population: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort),
#   'gq.type', 'mildep', 'persons', 'households'
示例#6
0
# Housekeeping stuff

# measure script time
start_time = time.time()

# change to current directory to find .yml input config file
full_path = os.path.abspath(inspect.getfile(inspect.currentframe()))
os.chdir(os.path.dirname(full_path))

# set console display to show MultiIndex for every row
pd.set_option('display.multi_sparse', False)

# rate versions to result database & return primary key for table
# db_run_id = log.new_run('model_summary.db')
db_run_id = log.new_run()

years = util.yaml_to_dict('model_config.yml', 'years')

# Load rates for all years: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort), 'rate', 'year'
#   pivot migration DataFrame w 4 rates: domestic in & out, foreign in & out

birth_rates = extract.create_df('birth', 'rate_table')
death_rates = extract.create_df('death', 'rate_table')
mig_rates = extract.create_df('migration', 'rate_table', pivot=True)

# Load base population: SQL query to pandas DataFrame
#   columns:  'age', 'race_ethn', 'sex' (cohort),
#   'gq.type', 'mildep', 'persons', 'households'