Пример #1
0
            df['year'] = year
            dfs.append(df)
    df = pd.concat(dfs)

    return df


# simple sum-aggregation of columns starting with prefix over index
def aggregate(df, prefix, index):
    return df.groupby(index).agg(
        {c: 'sum'
         for c in df.columns if c.startswith(prefix)})


if __name__ == "__main__":
    engine = util.create_engine()
    index = ['geoid', 'year']

    race_table = 'C02003'
    race_columns = {
        'race_count_total': 1,
        'race_count_white': 3,
        'race_count_black': 4,
        'race_count_asian': 6
    }
    race_agg = read_acs(race_table, race_columns, engine)
    race_agg.set_index(index, inplace=True)

    hispanic_table = 'B03003'
    hispanic_columns = {'race_count_hispanic': 3}
    hispanic_agg = read_acs(hispanic_table, hispanic_columns, engine)
Пример #2
0
Файл: data.py Проект: dssg/drain
 def run(self):
     return util.create_engine()
Пример #3
0
#!/usr/bin/python
from drain import util
import pandas as pd
from drain import data

engine = util.create_engine()

# read tables from db
building_permits = pd.read_sql("select street_number || ' ' || street_direction || ' ' || street_name || ' ' || suffix as address, issue_date, lower(replace(substring(permit_type from 10), '/', ' ')) as permit_type from input.building_permits where issue_date is not null", engine)

data.binarize(building_permits, {'permit_type' : building_permits.permit_type.unique()}, all_classes=True)

db = util.PgSQLDatabase(engine)
db.to_sql(frame=building_permits, name='building_permits',if_exists='replace', index=False, schema='aux')
Пример #4
0
 def run(self):
     return util.create_engine()