Exemplo n.º 1
0
def dat_crime(fpath=None):
    # Step Zero: Create dat_crime table
    raw_crime(fpath=fpath)
    dedupe_crime()
    src_crime()
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    dat_crime_table = crime_table('dat_chicago_crimes_all', Base.metadata)
    dat_crime_table.append_column(Column('chicago_crimes_all_row_id', Integer, primary_key=True))
    dat_crime_table.append_column(Column('start_date', TIMESTAMP, server_default=text('CURRENT_TIMESTAMP')))
    dat_crime_table.append_column(Column('end_date', TIMESTAMP, server_default=text('NULL')))
    dat_crime_table.append_column(Column('current_flag', Boolean, server_default=text('TRUE')))
    dat_crime_table.append_constraint(UniqueConstraint('id', 'start_date'))
    dat_crime_table.create(bind=engine, checkfirst=True)
    new_cols = ['start_date', 'end_date', 'current_flag', 'chicago_crimes_all_row_id']
    dat_ins = dat_crime_table.insert()\
        .from_select(
            [c for c in dat_crime_table.columns.keys() if c not in new_cols],
            select([c for c in src_crime_table.columns])
        )
    conn = engine.contextual_connect()
    res = conn.execute(dat_ins)
    cols = crime_master_cols(dat_crime_table)
    master_ins = MasterTable.insert()\
        .from_select(
            [c for c in MasterTable.columns.keys() if c != 'master_row_id'],
            select(cols)\
                .select_from(dat_crime_table)
        )
    conn = engine.contextual_connect()
    res = conn.execute(master_ins)
    return 'DAT crime created'
Exemplo n.º 2
0
def dat_crime(fpath=None):
    # Step Zero: Create dat_crime table
    raw_crime(fpath=fpath)
    dedupe_crime()
    src_crime()
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    dat_crime_table = crime_table('dat_chicago_crimes_all', Base.metadata)
    dat_crime_table.append_column(Column('chicago_crimes_all_row_id', Integer, primary_key=True))
    dat_crime_table.append_column(Column('start_date', TIMESTAMP, server_default=text('CURRENT_TIMESTAMP')))
    dat_crime_table.append_column(Column('end_date', TIMESTAMP, server_default=text('NULL')))
    dat_crime_table.append_column(Column('current_flag', Boolean, server_default=text('TRUE')))
    dat_crime_table.append_constraint(UniqueConstraint('id', 'start_date'))
    dat_crime_table.create(bind=engine, checkfirst=True)
    new_cols = ['start_date', 'end_date', 'current_flag', 'chicago_crimes_all_row_id']
    dat_ins = dat_crime_table.insert()\
        .from_select(
            [c for c in dat_crime_table.columns.keys() if c not in new_cols],
            select([c for c in src_crime_table.columns])
        )
    conn = engine.contextual_connect()
    res = conn.execute(dat_ins)
    cols = crime_master_cols(dat_crime_table)
    master_ins = MasterTable.insert()\
        .from_select(
            [c for c in MasterTable.columns.keys() if c != 'master_row_id'],
            select(cols)\
                .select_from(dat_crime_table)
        )
    conn = engine.contextual_connect()
    res = conn.execute(master_ins)
    cleanup_temp_tables()
    return 'DAT crime created'
Exemplo n.º 3
0
def chg_crime():
    # Step Seven: Find updates
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    chg_crime_table = Table('chg_chicago_crimes_all', Base.metadata, 
        Column('id', Integer, primary_key=True),
        extend_existing=True)
    chg_crime_table.drop(bind=engine, checkfirst=True)
    chg_crime_table.create(bind=engine)
    src_cols = [c for c in src_crime_table.columns if c.name not in ['id', 'start_date', 'end_date']]
    dat_cols = [c for c in dat_crime_table.columns if c.name not in ['id', 'start_date', 'end_date']]
    and_args = []
    for s, d in zip(src_cols, dat_cols):
        ors = or_(s != None, d != None)
        ands = and_(ors, s != d)
        and_args.append(ands)
    ins = chg_crime_table.insert()\
          .from_select(
              ['id'],
              select([src_crime_table.c.id])\
                  .select_from(src_crime_table.join(dat_crime_table,
                      src_crime_table.c.id == dat_crime_table.c.id))\
                  .where(or_(
                          and_(dat_crime_table.c.current_flag == True, 
                                and_(or_(src_crime_table.c.id != None, dat_crime_table.c.id != None), 
                                src_crime_table.c.id != dat_crime_table.c.id)),
                          *and_args))
          )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Changes found'
Exemplo n.º 4
0
def new_crime():
    # Step Four: Find New Crimes
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    new_crime_table = Table('new_chicago_crimes_all', Base.metadata, 
        Column('id', Integer, primary_key=True),
        extend_existing=True)
    new_crime_table.drop(bind=engine, checkfirst=True)
    new_crime_table.create(bind=engine)
    ins = new_crime_table.insert()\
        .from_select(
            ['id'],
            select([src_crime_table.c.id])\
                .select_from(src_crime_table.join(dat_crime_table, 
                    src_crime_table.c.id == dat_crime_table.c.id, isouter=True))\
                .where(dat_crime_table.c.chicago_crimes_all_row_id == None)
        )
    conn = engine.contextual_connect()
    try:
        conn.execute(ins)
        return 'New records found'
    except TypeError:
        # No new records
        return None
Exemplo n.º 5
0
def update_dat_crimes():
    # Step Five: Update Main Crime table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    try:
        new_crime_table = Table('new_chicago_crimes_all', Base.metadata, 
            autoload=True, autoload_with=engine, extend_existing=True)
    except NoSuchTableError:
        return None
    excluded_cols = ['end_date', 'current_flag', 'chicago_crimes_all_row_id']
    dat_cols = [c for c in dat_crime_table.columns.keys() if c not in excluded_cols]
    excluded_cols.append('start_date')
    src_cols = [c for c in src_crime_table.columns if c.name not in excluded_cols]
    src_cols.append(text("'%s' AS start_date" % datetime.now().strftime('%Y-%m-%d')))
    ins = dat_crime_table.insert()\
        .from_select(
            dat_cols,
            select(src_cols)\
                .select_from(src_crime_table.join(new_crime_table,
                    src_crime_table.c.id == new_crime_table.c.id))
        )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Crime Table updated'
Exemplo n.º 6
0
def new_crime():
    # Step Four: Find New Crimes
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    new_crime_table = Table('new_chicago_crimes_all', Base.metadata, 
        Column('id', Integer, primary_key=True),
        extend_existing=True)
    new_crime_table.drop(bind=engine, checkfirst=True)
    new_crime_table.create(bind=engine)
    ins = new_crime_table.insert()\
        .from_select(
            ['id'],
            select([src_crime_table.c.id])\
                .select_from(src_crime_table.join(dat_crime_table, 
                    src_crime_table.c.id == dat_crime_table.c.id, isouter=True))\
                .where(dat_crime_table.c.chicago_crimes_all_row_id == None)
        )
    conn = engine.contextual_connect()
    try:
        conn.execute(ins)
        return 'New records found'
    except TypeError:
        # No new records
        return None
Exemplo n.º 7
0
def update_dat_crimes():
    # Step Five: Update Main Crime table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    try:
        new_crime_table = Table('new_chicago_crimes_all', Base.metadata, 
            autoload=True, autoload_with=engine, extend_existing=True)
    except NoSuchTableError:
        return None
    excluded_cols = ['end_date', 'current_flag', 'chicago_crimes_all_row_id']
    dat_cols = [c for c in dat_crime_table.columns.keys() if c not in excluded_cols]
    excluded_cols.append('start_date')
    src_cols = [c for c in src_crime_table.columns if c.name not in excluded_cols]
    src_cols.append(text("'%s' AS start_date" % datetime.now().strftime('%Y-%m-%d')))
    ins = dat_crime_table.insert()\
        .from_select(
            dat_cols,
            select(src_cols)\
                .select_from(src_crime_table.join(new_crime_table,
                    src_crime_table.c.id == new_crime_table.c.id))
        )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Crime Table updated'
Exemplo n.º 8
0
def chg_crime():
    # Step Seven: Find updates
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = Table('src_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    chg_crime_table = Table('chg_chicago_crimes_all', Base.metadata, 
        Column('id', Integer, primary_key=True),
        extend_existing=True)
    chg_crime_table.drop(bind=engine, checkfirst=True)
    chg_crime_table.create(bind=engine)
    src_cols = [c for c in src_crime_table.columns if c.name not in ['id', 'start_date', 'end_date']]
    dat_cols = [c for c in dat_crime_table.columns if c.name not in ['id', 'start_date', 'end_date']]
    and_args = []
    for s, d in zip(src_cols, dat_cols):
        ors = or_(s != None, d != None)
        ands = and_(ors, s != d)
        and_args.append(ands)
    ins = chg_crime_table.insert()\
          .from_select(
              ['id'],
              select([src_crime_table.c.id])\
                  .select_from(src_crime_table.join(dat_crime_table,
                      src_crime_table.c.id == dat_crime_table.c.id))\
                  .where(or_(
                          and_(dat_crime_table.c.current_flag == True, 
                                and_(or_(src_crime_table.c.id != None, dat_crime_table.c.id != None), 
                                src_crime_table.c.id != dat_crime_table.c.id)),
                          *and_args))
          )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Changes found'
Exemplo n.º 9
0
def update_master_current_flag():
    # Step Eight: Update end_date and current_flag in master table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    update = MasterTable.update()\
        .values(current_flag=False, end_date=datetime.now().strftime('%Y-%m-%d'))\
        .where(MasterTable.c.dataset_row_id == dat_crime_table.c.chicago_crimes_all_row_id)\
        .where(dat_crime_table.c.current_flag==False)\
        .where(dat_crime_table.c.end_date==date.today())
    conn = engine.contextual_connect()
    conn.execute(update)
    return 'Master table current flag updated'
Exemplo n.º 10
0
def update_master_current_flag():
    # Step Eight: Update end_date and current_flag in master table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    update = MasterTable.update()\
        .values(current_flag=False, end_date=datetime.now().strftime('%Y-%m-%d'))\
        .where(MasterTable.c.dataset_row_id == dat_crime_table.c.chicago_crimes_all_row_id)\
        .where(dat_crime_table.c.current_flag==False)\
        .where(dat_crime_table.c.end_date==date.today())
    conn = engine.contextual_connect()
    conn.execute(update)
    return 'Master table current flag updated'
Exemplo n.º 11
0
def update_crime_current_flag():
    # Step Seven: Update end_date and current_flag in crime table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    chg_crime_table = Table('chg_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    update = dat_crime_table.update()\
        .values(current_flag=False, end_date=datetime.now().strftime('%Y-%m-%d'))\
        .where(dat_crime_table.c.id==chg_crime_table.c.id)\
        .where(dat_crime_table.c.current_flag == True)
    conn = engine.contextual_connect()
    conn.execute(update)
    return 'Crime table current flag updated'
Exemplo n.º 12
0
def update_crime_current_flag():
    # Step Seven: Update end_date and current_flag in crime table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    chg_crime_table = Table('chg_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    update = dat_crime_table.update()\
        .values(current_flag=False, end_date=datetime.now().strftime('%Y-%m-%d'))\
        .where(dat_crime_table.c.id==chg_crime_table.c.id)\
        .where(dat_crime_table.c.current_flag == True)
    conn = engine.contextual_connect()
    conn.execute(update)
    return 'Crime table current flag updated'
Exemplo n.º 13
0
def sf_crime(fpath=None, crime_type='violent'):
    #raw_crime = sf_raw_crime(fpath=fpath)
    # Assume for now there's no duplicate in the raw data, which means we don't
    # - dedupe_crime()
    # - and don't create src_crime()
    raw_crime_table = Table('raw_sf_crimes_all',
                            Base.metadata,
                            autoload=True,
                            autoload_with=engine,
                            extend_existing=True)
    if crime_type == 'violent':
        categories = ['ASSAULT', 'ROBBERY', 'SEX OFFENSES, FORCIBLE']
    elif crime_type == 'property':
        categories = ['LARCENY/THEFT', 'VEHICLE THEFT', 'BURGLARY', 'STOLEN PROPERTY',\
                      'ARSON', 'VANDALISM']
    # Create table "dat_sf_crimes_all", that contains additional fields needed
    # by Plenario, in addition to the raw data
    crime_table = sf_crime_table('sf_{0}_crimes'.format(crime_type),
                                 Base.metadata)
    # Add geom column
    crime_table.append_column(Column('geom', Geometry('POINT', srid=4326)))
    # Add obs_date column
    crime_table.append_column(Column('obs_date', DateTime))
    # Add row_id column
    crime_table.append_column(Column('row_id', Integer, primary_key=True))
    # Constrain (id, start_date) to be unique (?)
    # dat_crime_table.append_constraint(UniqueConstraint('id', 'start_date'))
    crime_table.drop(bind=engine, checkfirst=True)
    crime_table.create(bind=engine)
    new_cols = ['row_id']
    # Insert data from raw_crime_table (to be src_crime_table when we'll check
    # for duplicates)
    dat_ins = crime_table.insert()\
        .from_select(
            [c for c in crime_table.columns.keys() if c not in new_cols],
            select([c for c in raw_crime_table.columns if c.name !=
                'dup_row_id'] + [
                    func.ST_SetSRID(
                        func.ST_MakePoint(raw_crime_table.c['longitude'],
                            raw_crime_table.c['latitude']), 4326) ] + \
                    [ raw_crime_table.c['date'].label('obs_date') ])\
                .where(raw_crime_table.c['category'].in_(categories))
        )
    conn = engine.contextual_connect()
    res = conn.execute(dat_ins)
    return 'Table sf_{0}_crime created'.format(crime_type)
Exemplo n.º 14
0
def dedupe_crime():
    # Step Two: Find duplicate records by ID
    raw_crime_table = Table('raw_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    dedupe_crime_table = Table('dedup_chicago_crimes_all', Base.metadata,
        Column('dup_row_id', Integer, primary_key=True),
        extend_existing=True)
    dedupe_crime_table.drop(bind=engine, checkfirst=True)
    dedupe_crime_table.create(bind=engine)
    ins = dedupe_crime_table.insert()\
        .from_select(
            ['dup_row_id'], 
            select([func.max(raw_crime_table.c.dup_row_id)])\
            .group_by(raw_crime_table.c.id)
        )
    conn = engine.contextual_connect()
    res = conn.execute(ins)
    return 'Raw crime deduplicated'
Exemplo n.º 15
0
def src_crime():
    # Step Three: Create New table with unique ids
    raw_crime_table = Table('raw_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    dedupe_crime_table = Table('dedup_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = crime_table('src_chicago_crimes_all', Base.metadata)
    src_crime_table.drop(bind=engine, checkfirst=True)
    src_crime_table.create(bind=engine)
    ins = src_crime_table.insert()\
        .from_select(
            src_crime_table.columns.keys(),
            select([c for c in raw_crime_table.columns if c.name != 'dup_row_id'])\
                .where(raw_crime_table.c.dup_row_id == dedupe_crime_table.c.dup_row_id)
        )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Source table created'
Exemplo n.º 16
0
def src_crime():
    # Step Three: Create New table with unique ids
    raw_crime_table = Table('raw_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    dedupe_crime_table = Table('dedup_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    src_crime_table = crime_table('src_chicago_crimes_all', Base.metadata)
    src_crime_table.drop(bind=engine, checkfirst=True)
    src_crime_table.create(bind=engine)
    ins = src_crime_table.insert()\
        .from_select(
            src_crime_table.columns.keys(),
            select([c for c in raw_crime_table.columns if c.name != 'dup_row_id'])\
                .where(raw_crime_table.c.dup_row_id == dedupe_crime_table.c.dup_row_id)
        )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Source table created'
Exemplo n.º 17
0
def add_dataset_meta(name, file_name='', human_name='', description='',
    val_attr='', count_q=False, area_q=False, dist_q=False,
    temp_q=False, weighted_q=False, access_q=False,
    voronoi=False, duration='interval', demo=False):
    """ Add infotmation about a dataset in the meta table """
    if human_name == '':
        human_name = name
    meta_table = Table('sf_meta', Base.metadata,
        autoload=True, autoload_with=engine, extend_existing=True)
    row = {'table_name': name, 'file_name': file_name,
        'human_name': human_name,'description': description,
        'last_update': func.current_timestamp(), 'val_attr': val_attr,
        'count_q': count_q, 'area_q': area_q, 'dist_q': dist_q,
        'temp_q': temp_q, 'weighted_q': weighted_q, 'access_q': access_q,
        'voronoi': voronoi, 'duration': duration, 'demo': demo}
    ins = meta_table.insert(row)
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Meta information for {0} inserted.'.format(name)
Exemplo n.º 18
0
def add_dataset_meta(name,
                     file_name='',
                     human_name='',
                     description='',
                     val_attr='',
                     count_q=False,
                     area_q=False,
                     dist_q=False,
                     temp_q=False,
                     weighted_q=False,
                     access_q=False,
                     voronoi=False,
                     duration='interval',
                     demo=False):
    """ Add infotmation about a dataset in the meta table """
    if human_name == '':
        human_name = name
    meta_table = Table('sf_meta',
                       Base.metadata,
                       autoload=True,
                       autoload_with=engine,
                       extend_existing=True)
    row = {
        'table_name': name,
        'file_name': file_name,
        'human_name': human_name,
        'description': description,
        'last_update': func.current_timestamp(),
        'val_attr': val_attr,
        'count_q': count_q,
        'area_q': area_q,
        'dist_q': dist_q,
        'temp_q': temp_q,
        'weighted_q': weighted_q,
        'access_q': access_q,
        'voronoi': voronoi,
        'duration': duration,
        'demo': demo
    }
    ins = meta_table.insert(row)
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Meta information for {0} inserted.'.format(name)
Exemplo n.º 19
0
def sf_crime(fpath=None, crime_type='violent'):
    #raw_crime = sf_raw_crime(fpath=fpath)
    # Assume for now there's no duplicate in the raw data, which means we don't
    # - dedupe_crime()
    # - and don't create src_crime()
    raw_crime_table = Table('raw_sf_crimes_all', Base.metadata,
        autoload=True, autoload_with=engine, extend_existing=True)
    if crime_type == 'violent':
        categories = ['ASSAULT', 'ROBBERY', 'SEX OFFENSES, FORCIBLE']
    elif crime_type == 'property':
        categories = ['LARCENY/THEFT', 'VEHICLE THEFT', 'BURGLARY', 'STOLEN PROPERTY',\
                      'ARSON', 'VANDALISM']
    # Create table "dat_sf_crimes_all", that contains additional fields needed
    # by Plenario, in addition to the raw data
    crime_table = sf_crime_table('sf_{0}_crimes'.format(crime_type), Base.metadata)
    # Add geom column
    crime_table.append_column(Column('geom', Geometry('POINT', srid=4326)))
    # Add obs_date column
    crime_table.append_column(Column('obs_date', DateTime))
    # Add row_id column
    crime_table.append_column(Column('row_id', Integer, primary_key=True))
    # Constrain (id, start_date) to be unique (?)
    # dat_crime_table.append_constraint(UniqueConstraint('id', 'start_date'))
    crime_table.drop(bind=engine, checkfirst=True)
    crime_table.create(bind=engine)
    new_cols = ['row_id']
    # Insert data from raw_crime_table (to be src_crime_table when we'll check
    # for duplicates)
    dat_ins = crime_table.insert()\
        .from_select(
            [c for c in crime_table.columns.keys() if c not in new_cols],
            select([c for c in raw_crime_table.columns if c.name !=
                'dup_row_id'] + [
                    func.ST_SetSRID(
                        func.ST_MakePoint(raw_crime_table.c['longitude'],
                            raw_crime_table.c['latitude']), 4326) ] + \
                    [ raw_crime_table.c['date'].label('obs_date') ])\
                .where(raw_crime_table.c['category'].in_(categories))
        )
    conn = engine.contextual_connect()
    res = conn.execute(dat_ins)
    return 'Table sf_{0}_crime created'.format(crime_type)
Exemplo n.º 20
0
def update_master():
    # Step Six: Update Master table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    try:
        new_crime_table = Table('new_chicago_crimes_all', Base.metadata, 
            autoload=True, autoload_with=engine, extend_existing=True)
    except NoSuchTableError:
        return None
    cols = crime_master_cols(dat_crime_table)
    ins = MasterTable.insert()\
        .from_select(
            [c for c in MasterTable.columns.keys() if c != 'master_row_id'],
            select(cols)\
                .select_from(dat_crime_table.join(new_crime_table, 
                    dat_crime_table.c.id == new_crime_table.c.id))
        )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Master updated'
Exemplo n.º 21
0
def update_master():
    # Step Six: Update Master table
    dat_crime_table = Table('dat_chicago_crimes_all', Base.metadata, 
        autoload=True, autoload_with=engine, extend_existing=True)
    try:
        new_crime_table = Table('new_chicago_crimes_all', Base.metadata, 
            autoload=True, autoload_with=engine, extend_existing=True)
    except NoSuchTableError:
        return None
    cols = crime_master_cols(dat_crime_table)
    ins = MasterTable.insert()\
        .from_select(
            [c for c in MasterTable.columns.keys() if c != 'master_row_id'],
            select(cols)\
                .select_from(dat_crime_table.join(new_crime_table, 
                    dat_crime_table.c.id == new_crime_table.c.id))
        )
    conn = engine.contextual_connect()
    conn.execute(ins)
    return 'Master updated'
Exemplo n.º 22
0
def import_shapefile_timed(fpath, name, force_multipoly=False, proj=4326,
    voronoi=False, duration='interval', start_date=None, end_date=None,
    obs_date_field='obs_date'):
    """Import a shapefile into the PostGIS database

    Keyword arguments:
    fpath -- path to a zipfile to be extracted
    name -- name given to the newly created table
    force_multipoly -- enforce that the gemoetries are multipolygons
    proj -- source projection spec (EPSG code or Proj$ string)
    voronoi -- compute voronoi triangulations of points
    duration -- 'interval' or 'event'
    start_date -- initial date of shapes life (works with duration = 'interval')
    end_date -- final date of shapes life (works with duration = 'interval')
    obs_date_field -- where to find event time info (works with duration =
                      'event')
    """
    # Open the shapefile with fiona.
    with fiona.open('/', vfs='zip://{0}'.format(fpath)) as shp:
        shp_table = shp2table(name, Base.metadata, shp.schema,
            force_multipoly=force_multipoly)
        shp_table.drop(bind=engine, checkfirst=True)
        shp_table.append_column(Column('row_id', Integer, primary_key=True))
        if duration == 'interval':
            shp_table.append_column(Column('start_date', DateTime,\
                default=datetime.min))
            shp_table.append_column(Column('end_date', DateTime,\
                default=datetime.max))
        else:
            shp_table.append_column(Column('obs_date', DateTime,\
                default=datetime.now()))
        # If the geometry is not "point", append a centroid column
        if shp.schema['geometry'].lower() != 'point':
            shp_table.append_column(Column('centroid', Geometry('POINT',
                                    srid=4326)))
        # Add a column and compute Voronoi triangulation, if required
        if shp.schema['geometry'].lower() == 'point' and voronoi:
            pts = [p['geometry']['coordinates'] for p in shp.values()]
            pts = transform_proj(pts, proj, 4326)
            pts_map = dict([[str(i), p] for (i, p) in zip(range(len(pts)), pts)])
            vor_polygons = VoronoiGeoJson_Polygons(pts_map, BoundingBox='W')
            vor_polygons = json.loads(vor_polygons)
            # For matching the polygons to the correct point, we create a
            # dictionary with _domain_id as keys
            vor_polygons_dict = dict(zip(range(len(pts)),\
                ['POLYGON EMPTY']*len(pts)))
            for r in vor_polygons:
                vor_polygons_dict[int(r['properties']['_domain_id'])] =\
                    shape(r['geometry']).wkt
            shp_table.append_column(Column('voronoi', Geometry('POLYGON',
                                    srid=4326)))
        shp_table.create(bind=engine)
        features = []
        count = 0
        num_shapes = len(shp)
        for r in shp:
            # ESRI shapefile don't contemplate multipolygons, i.e. the geometry
            # type is polygon even if multipolygons are contained.
            # If and when the 1st multipoly is encountered, the table is
            # re-initialized.
            if not force_multipoly and r['geometry']['type'] == 'MultiPolygon':
                return import_shapefile_timed(fpath, name, force_multipoly=True,
                    proj=proj, voronoi=voronoi, duration=duration,
                    start_date=start_date, end_date=end_date,
                    obs_date_field=obs_date_field)
            row_dict = dict((k.lower(), v) for k, v in r['properties'].iteritems())
            # GeoJSON intermediate representation
            geom_json = json.loads(str(r['geometry']).replace('\'', '"')\
                                   .replace('(', '[').replace(')', ']'))
            # If the projection is not long/lat (WGS84 - EPGS:4326), transform.
            if proj != 4326:
                geom_json['coordinates'] = transform_proj(geom_json['coordinates'], proj, 4326)
            # Shapely intermediate representation, used to obtained the WKT
            geom = shape(geom_json)
            if force_multipoly and r['geometry']['type'] != 'MultiPolygon':
                geom = MultiPolygon([geom])
            row_dict['geom'] = 'SRID=4326;{0}'.format(geom.wkt)
            if shp.schema['geometry'].lower() != 'point':
                row_dict['centroid'] =\
                    'SRID=4326;{0}'.format(geom.centroid.wkt)
            if duration == 'interval':
                if start_date:
                    row_dict['start_date'] = start_date
                elif np.random.rand() < 0.33:
                    row_dict['start_date'] = datetime(2004, 03, 04)
                if end_date:
                    row_dict['end_date'] = end_date
                elif np.random.rand() < 0.33:
                    row_dict['end_date'] = datetime(2009, 07, 06)
            else:
                row_dict['obs_date'] = row_dict[obs_date_field.lower()]
                del row_dict[obs_date_field]
            if shp.schema['geometry'].lower() == 'point' and voronoi:
                row_dict['voronoi'] =\
                    'SRID=4326;{0}'.format(vor_polygons_dict[count])
            features.append(row_dict)
            count += 1
            #if count > 100: break
            # Buffer DB writes
            if not count % 1000 or count == num_shapes:
                try:
                    ins = shp_table.insert(features)
                    conn = engine.contextual_connect()
                    conn.execute(ins)
                except SQLAlchemyError as e:
                    print type(e)
                    print e.orig
                    return "Failed."
                features = []
                print "Inserted {0} shapes in dataset {1}".format(count, name)
    return 'Table {0} created from shapefile'.format(name)
Exemplo n.º 23
0
def import_shapefile_timed(fpath,
                           name,
                           force_multipoly=False,
                           proj=4326,
                           voronoi=False,
                           duration='interval',
                           start_date=None,
                           end_date=None,
                           obs_date_field='obs_date'):
    """Import a shapefile into the PostGIS database

    Keyword arguments:
    fpath -- path to a zipfile to be extracted
    name -- name given to the newly created table
    force_multipoly -- enforce that the gemoetries are multipolygons
    proj -- source projection spec (EPSG code or Proj$ string)
    voronoi -- compute voronoi triangulations of points
    duration -- 'interval' or 'event'
    start_date -- initial date of shapes life (works with duration = 'interval')
    end_date -- final date of shapes life (works with duration = 'interval')
    obs_date_field -- where to find event time info (works with duration =
                      'event')
    """
    # Open the shapefile with fiona.
    with fiona.open('/', vfs='zip://{0}'.format(fpath)) as shp:
        shp_table = shp2table(name,
                              Base.metadata,
                              shp.schema,
                              force_multipoly=force_multipoly)
        shp_table.drop(bind=engine, checkfirst=True)
        shp_table.append_column(Column('row_id', Integer, primary_key=True))
        if duration == 'interval':
            shp_table.append_column(Column('start_date', DateTime,\
                default=datetime.min))
            shp_table.append_column(Column('end_date', DateTime,\
                default=datetime.max))
        else:
            shp_table.append_column(Column('obs_date', DateTime,\
                default=datetime.now()))
        # If the geometry is not "point", append a centroid column
        if shp.schema['geometry'].lower() != 'point':
            shp_table.append_column(
                Column('centroid', Geometry('POINT', srid=4326)))
        # Add a column and compute Voronoi triangulation, if required
        if shp.schema['geometry'].lower() == 'point' and voronoi:
            pts = [p['geometry']['coordinates'] for p in shp.values()]
            pts = transform_proj(pts, proj, 4326)
            pts_map = dict([[str(i), p]
                            for (i, p) in zip(range(len(pts)), pts)])
            vor_polygons = VoronoiGeoJson_Polygons(pts_map, BoundingBox='W')
            vor_polygons = json.loads(vor_polygons)
            # For matching the polygons to the correct point, we create a
            # dictionary with _domain_id as keys
            vor_polygons_dict = dict(zip(range(len(pts)),\
                ['POLYGON EMPTY']*len(pts)))
            for r in vor_polygons:
                vor_polygons_dict[int(r['properties']['_domain_id'])] =\
                    shape(r['geometry']).wkt
            shp_table.append_column(
                Column('voronoi', Geometry('POLYGON', srid=4326)))
        shp_table.create(bind=engine)
        features = []
        count = 0
        num_shapes = len(shp)
        for r in shp:
            # ESRI shapefile don't contemplate multipolygons, i.e. the geometry
            # type is polygon even if multipolygons are contained.
            # If and when the 1st multipoly is encountered, the table is
            # re-initialized.
            if not force_multipoly and r['geometry']['type'] == 'MultiPolygon':
                return import_shapefile_timed(fpath,
                                              name,
                                              force_multipoly=True,
                                              proj=proj,
                                              voronoi=voronoi,
                                              duration=duration,
                                              start_date=start_date,
                                              end_date=end_date,
                                              obs_date_field=obs_date_field)
            row_dict = dict(
                (k.lower(), v) for k, v in r['properties'].iteritems())
            # GeoJSON intermediate representation
            geom_json = json.loads(str(r['geometry']).replace('\'', '"')\
                                   .replace('(', '[').replace(')', ']'))
            # If the projection is not long/lat (WGS84 - EPGS:4326), transform.
            if proj != 4326:
                geom_json['coordinates'] = transform_proj(
                    geom_json['coordinates'], proj, 4326)
            # Shapely intermediate representation, used to obtained the WKT
            geom = shape(geom_json)
            if force_multipoly and r['geometry']['type'] != 'MultiPolygon':
                geom = MultiPolygon([geom])
            row_dict['geom'] = 'SRID=4326;{0}'.format(geom.wkt)
            if shp.schema['geometry'].lower() != 'point':
                row_dict['centroid'] =\
                    'SRID=4326;{0}'.format(geom.centroid.wkt)
            if duration == 'interval':
                if start_date:
                    row_dict['start_date'] = start_date
                elif np.random.rand() < 0.33:
                    row_dict['start_date'] = datetime(2004, 03, 04)
                if end_date:
                    row_dict['end_date'] = end_date
                elif np.random.rand() < 0.33:
                    row_dict['end_date'] = datetime(2009, 07, 06)
            else:
                row_dict['obs_date'] = row_dict[obs_date_field.lower()]
                del row_dict[obs_date_field]
            if shp.schema['geometry'].lower() == 'point' and voronoi:
                row_dict['voronoi'] =\
                    'SRID=4326;{0}'.format(vor_polygons_dict[count])
            features.append(row_dict)
            count += 1
            #if count > 100: break
            # Buffer DB writes
            if not count % 1000 or count == num_shapes:
                try:
                    ins = shp_table.insert(features)
                    conn = engine.contextual_connect()
                    conn.execute(ins)
                except SQLAlchemyError as e:
                    print type(e)
                    print e.orig
                    return "Failed."
                features = []
                print "Inserted {0} shapes in dataset {1}".format(count, name)
    return 'Table {0} created from shapefile'.format(name)