FROM tmp.moving_violations a INNER JOIN  source_data.mv_geocodes ON source_data.mv_geocodes.location = a.location
   WHERE a.latitude IS NULL and a.longitude IS NULL
"""

engine.execute(step_2_query)
print("lat and long values updated")

# create geography field with populated lat and long values, and create index
step_3_query = """
UPDATE tmp.moving_violations_add_geocode
SET geography = ST_SetSRID(ST_MakePoint(longitude_2, latitude_2), 4326)::geography
WHERE geography IS NULL AND longitude_2 is not null AND latitude_2 is not null
;

CREATE INDEX IF NOT EXISTS mv_geom_idx ON tmp.moving_violations_add_geocode USING GIST (geography);

"""
engine.execute(step_3_query)
print("geography field created")

# Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
next_tables = add_location_info(engine=engine, target_schema='tmp', target_table='moving_violations_nbh_ward', from_schema='tmp', from_table='moving_violations_add_geocode', partition_by_field='objectid')
print("neighborhood-ward query complete")
next_tables = add_school_info(engine=engine, target_schema='tmp', target_table='moving_violations_schools', from_schema=next_tables[0], from_table=next_tables[1])
print("schools query complete")
next_tables = add_roadway_info(engine=engine, target_schema='tmp', target_table='moving_violations_roadway_info', from_schema=next_tables[0], from_table=next_tables[1], partition_by_field='objectid', within_distance= 10)
print("roadway info query complete")
next_tables = add_intersection_info(engine=engine, target_schema='tmp', target_table='moving_violations_intersection_info', from_schema=next_tables[0], from_table=next_tables[1], partition_by_field='objectid', within_distance= 20)
print("intersection info query complete")
row_count = create_final_table(engine=engine, target_schema = 'analysis_data', target_table='moving_violations', from_schema=next_tables[0], from_table=next_tables[1])
print("final query complete with row count ",row_count)
env = "DEV"
engine = create_postgres_engine(destination="AWS_PostGIS",
                                target_db=dbname,
                                env=env)
db_credentials = get_connection_strings("AWS_PostGIS")

engine.execute(
    'update source_data.census_blocks set geography = ST_MakeValid(geography::geometry)::geography'
)
print('geography updated')

# Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
next_tables = add_location_info(engine=engine,
                                target_schema='tmp',
                                target_table='census_blocks_nbh_ward',
                                from_schema='source_data',
                                from_table='census_blocks',
                                partition_by_field='objectid')
print("neighborhood-ward query complete")
next_tables = add_school_info(engine=engine,
                              target_schema='tmp',
                              target_table='census_blocks_schools',
                              from_schema=next_tables[0],
                              from_table=next_tables[1])
print("schools query complete")
row_count = create_final_table(engine=engine,
                               target_schema='analysis_data',
                               target_table='census_blocks',
                               from_schema=next_tables[0],
                               from_table=next_tables[1])
print("final query complete with row count ", row_count)
示例#3
0
def generate_pulsepoint_analysis_table(AWS_Credentials: dict, **kwargs):

    # if no environment is specified default to dev
    env = kwargs.get('env', None)
    if env == None:
        env = 'DEV'
    env = env.upper()

    # set up RDS and S3 connections, engines, cursors
    region = AWS_Credentials['region']
    engine = create_postgres_engine(destination="AWS_PostGIS", env=env)

    # flag that some records might be duplicate calls for the same incident
    dupe_check_query = """
    DROP TABLE IF EXISTS tmp.pulsepoint_dupe_check;
    CREATE TABLE tmp.pulsepoint_dupe_check 
    AS (
        SELECT DISTINCT a.* , 
            case 
                when b.incident_id is null then 1 
                when a.num_units_responding = 0 and b.num_units_responding >0 then 0 
                when b.unit_status_transport > a.unit_status_transport then 0
                when b.num_units_responding > a.num_units_responding then 0 
                when b.call_received_datetime < a.call_received_datetime then 0
                else 1 end as KEEP_RECORD_FLAG
        FROM source_data.pulsepoint a
        LEFT JOIN source_data.pulsepoint b on a.incident_id <> b.incident_id 
        and date_part('day', a.call_received_datetime - b.call_received_datetime) = 0
        and date_part('hour', a.call_received_datetime - b.call_received_datetime) = 0
        and date_part('month', a.call_received_datetime - b.call_received_datetime) = 0
        and abs(date_part('minute', a.call_received_datetime - b.call_received_datetime)) <=20
        and ST_DWithin(a.geography, b.geography, 100)
        and a.Agency_ID = b.Agency_ID
        and (a.num_units_responding = 0 or a.unit_ids && b.unit_ids)

    ) ;

    CREATE INDEX IF NOT EXISTS pulsepoint_dupe_check_geom_idx ON tmp.pulsepoint_dupe_check USING GIST (geography);
    """

    # then join to the crashes table
    crashes_join_query = """


    DROP TABLE IF EXISTS tmp.pulsepoint_crash_join;
    CREATE TABLE tmp.pulsepoint_crash_join 
    AS (SELECT * 
    FROM (
        SELECT DISTINCT a.* 
        ,concat(a.agency_id, a.incident_id) as Agency_Incident_ID
            ,b.objectid as Crash_Objectid 
            ,b.geography as Crash_Geo
            ,b.total_injuries as Crash_Total_Injuries
            ,b.total_major_injuries as Crash_Total_Major_Injuries 
            ,b.total_minor_injuries as Crash_Total_Minor_Injuries 
            ,(b.bicycle_fatalities + b.pedestrian_fatalities + b.vehicle_fatalities) as Crash_Total_Fatalities
            ,b.bicycle_injuries as Crash_Bike_Injuries
            ,b.vehicle_injuries as Crash_Car_Injuries
            ,b.pedestrian_injuries as Crash_Ped_Injuries
            ,case when b.total_injuries is null or b.total_injuries < a.unit_status_transport then 1 else 0 end as injuries_mismatch
            ,ST_Distance(a.geography, b.geography) as Distance_To_Crash
            ,(b.reportdate at time zone 'America/New_York')  - (a.CALL_RECEIVED_DATETIME at time zone 'America/New_York')  as Time_Between_Crash_And_Report
            ,b.intersectionid as Crash_Intersection_ID
            ,b.block_objectid as Crash_Block_Objectid
            ,Row_Number() over (partition by a.incident_id, a.agency_id order by ST_Distance(a.geography, b.geography)) as Crash_Distance_Rank
            ,Row_Number() over (partition by a.incident_id, a.agency_id order by (b.reportdate at time zone 'America/New_York')  - (a.CALL_RECEIVED_DATETIME at time zone 'America/New_York')) as Crash_Time_Rank
        FROM tmp.pulsepoint_dupe_check a
        LEFT JOIN analysis_data.dc_crashes_w_details b on ST_DWITHIN(a.geography, b.geography, 200) 
            AND cast(b.fromdate as date) =cast((call_received_datetime at time zone 'America/New_York') as date)
            AND (a.CALL_RECEIVED_DATETIME at time zone 'America/New_York')  < (b.reportdate at time zone 'America/New_York') 
        WHERE a.KEEP_RECORD_FLAG = 1
    ) tmp WHERE Crash_Distance_Rank = 1 and (incident_type in ('TC', 'TCE', 'TCS') or (agency_id = '16000' and incident_type in ('TC', 'TCS', 'TCE', 'RES')))
    ) ;

    CREATE INDEX IF NOT EXISTS pulsepoint_crash_join_geom_idx ON tmp.pulsepoint_crash_join USING GIST (geography);

    alter table tmp.pulsepoint_crash_join drop column KEEP_RECORD_FLAG;
    alter table tmp.pulsepoint_crash_join drop column Crash_Distance_Rank;
    """

    # First execute the table-specific queries
    engine.execute(dupe_check_query)
    print("dupe check query complete")

    engine.execute(crashes_join_query)
    print("join to crashes query complete")

    # Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
    next_tables = add_location_info(engine=engine,
                                    target_schema='tmp',
                                    target_table='pulsepoint_nbh_ward',
                                    from_schema='tmp',
                                    from_table='pulsepoint_crash_join',
                                    partition_by_field='Agency_Incident_ID')
    print("neighborhood-ward query complete")
    next_tables = add_school_info(engine=engine,
                                  target_schema='tmp',
                                  target_table='pulsepoint_schools',
                                  from_schema=next_tables[0],
                                  from_table=next_tables[1])
    print("schools query complete")
    next_tables = add_walkscore_info(engine=engine,
                                     target_schema='tmp',
                                     target_table='pulsepoint_walkscore',
                                     from_schema=next_tables[0],
                                     from_table=next_tables[1])
    print("walkscore query complete")
    next_tables = add_roadway_info(engine=engine,
                                   target_schema='tmp',
                                   target_table='pulsepoint_roadway_info',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1],
                                   partition_by_field='Agency_Incident_ID',
                                   within_distance=100)
    print("roadway info query complete")
    next_tables = add_intersection_info(
        engine=engine,
        target_schema='tmp',
        target_table='pulsepoint_intersection_info',
        from_schema=next_tables[0],
        from_table=next_tables[1],
        partition_by_field='Agency_Incident_ID',
        within_distance=60)
    print("intersection info query complete")
    next_tables = is_national_park(engine=engine,
                                   target_schema='tmp',
                                   target_table='pulsepoint_national_park',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1])
    print("national parks info query complete")
    row_count = create_final_table(engine=engine,
                                   target_schema='analysis_data',
                                   target_table='pulsepoint',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1])
    print("final query complete with row count ", row_count)
def generate_crashes_table(AWS_Credentials: dict, **kwargs):

    # if no environment is specified default to dev
    env = kwargs.get('env', None)
    if env == None:
        env = 'DEV'
    env = env.upper()

    # set up RDS and S3 connections, engines, cursors
    region = AWS_Credentials['region']
    engine = create_postgres_engine(destination="AWS_PostGIS", env=env)

    # The queries that are specific to the crash data and are not run anywhere else
    add_columns_query = """
    DROP TABLE IF EXISTS tmp.crash_details;
    CREATE TABLE tmp.crash_details 
    AS (
        SELECT *
            ,CASE WHEN PERSONTYPE = 'Driver' AND AGE >=65 THEN 1 ELSE 0 END AS DRIVERS_OVER_65
            ,CASE WHEN PERSONTYPE = 'Driver' AND AGE <=25 THEN 1 ELSE 0 END AS DRIVERS_UNDER_25
            ,CASE WHEN PERSONTYPE = 'Pedestrian' AND AGE >=65 THEN 1 ELSE 0 END AS PEDS_OVER_65
            ,CASE WHEN PERSONTYPE = 'Pedestrian' AND AGE <=12 THEN 1 ELSE 0 END AS PEDS_UNDER_12
            ,CASE WHEN PERSONTYPE = 'Bicyclist' AND AGE >=65 THEN 1 ELSE 0 END AS BIKERS_OVER_65
            ,CASE WHEN PERSONTYPE = 'Bicyclist' AND AGE <=18 THEN 1 ELSE 0 END AS BIKERS_UNDER_18
            ,CASE WHEN PERSONTYPE = 'Driver' AND LICENSEPLATESTATE <> 'DC' AND LICENSEPLATESTATE <> ' None' THEN 1 ELSE 0 END AS OOS_VEHICLES
            ,CASE WHEN PERSONTYPE = 'Driver' AND INVEHICLETYPE = 'Passenger Car/automobile' THEN 1 ELSE 0 END AS NUM_CARS
            ,CASE WHEN PERSONTYPE = 'Driver' AND INVEHICLETYPE in ('Suv (sport Utility Vehicle)', 'Pickup Truck') THEN 1 ELSE 0 END AS NUM_SUVS_OR_TRUCKS

            ,CASE WHEN PERSONTYPE = 'Pedestrian' AND FATAL='Y' THEN 1 ELSE 0 END AS PED_FATALITIES
            ,CASE WHEN PERSONTYPE = 'Bicyclist' AND FATAL='Y'THEN 1 ELSE 0 END AS BICYCLE_FATALITIES
            ,CASE WHEN PERSONTYPE in ('Driver','Passenger') AND FATAL='Y' THEN 1 ELSE 0 END AS VEHICLE_FATALITIES

            ,CASE WHEN PERSONTYPE = 'Pedestrian' AND (MAJORINJURY='Y' OR MINORINJURY ='Y')THEN 1 ELSE 0 END AS PED_INJURIES
            ,CASE WHEN PERSONTYPE = 'Bicyclist' AND (MAJORINJURY='Y' OR MINORINJURY ='Y') THEN 1 ELSE 0 END AS BICYCLE_INJURIES
            ,CASE WHEN PERSONTYPE in ('Driver','Passenger') AND (MAJORINJURY='Y' OR MINORINJURY ='Y') THEN 1 ELSE 0 END AS VEHICLE_INJURIES
            ,CASE WHEN PERSONTYPE = 'Driver' AND TICKETISSUED ='Y' THEN 1 ELSE 0 END AS DRIVER_TICKETS
            ,CASE WHEN PERSONTYPE = 'Driver' AND SPEEDING ='Y' THEN 1 ELSE 0 END AS DRIVERS_SPEEDING
            ,CASE WHEN PERSONTYPE = 'Driver' AND IMPAIRED ='Y' THEN 1 ELSE 0 END AS DRIVERS_IMPAIRED

            ,CASE WHEN PERSONTYPE = 'Bicyclist' AND TICKETISSUED ='Y' THEN 1 ELSE 0 END AS BICYCLE_TICKETS
            ,CASE WHEN PERSONTYPE = 'Pedestrian' AND TICKETISSUED ='Y'  THEN 1 ELSE 0 END AS PED_TICKETS
            ,CASE WHEN (MAJORINJURY='Y' OR MINORINJURY ='Y') THEN 1 ELSE 0 END AS TOTAL_INJURIES
            ,CASE WHEN MAJORINJURY='Y' THEN 1 ELSE 0 END AS TOTAL_MAJOR_INJURIES
            ,CASE WHEN MINORINJURY ='Y' THEN 1 ELSE 0 END AS TOTAL_MINOR_INJURIES

            ,CASE WHEN PERSONTYPE = 'Driver' THEN 1 ELSE 0 END AS TOTAL_VEHICLES
            ,CASE WHEN PERSONTYPE = 'Pedestrian' THEN 1 ELSE 0 END AS TOTAL_PEDESTRIANS
            ,CASE WHEN PERSONTYPE = 'Bicyclist' THEN 1 ELSE 0 END AS TOTAL_BICYCLISTS
        FROM source_data.crash_details
    )
    """
    group_by_query = """
    DROP TABLE IF EXISTS tmp.crash_details_agg;
    CREATE  TABLE tmp.crash_details_agg 
    AS (
        SELECT 
            CRIMEID
            ,SUM(DRIVERS_OVER_65) AS DRIVERS_OVER_65
            ,SUM(DRIVERS_UNDER_25) AS DRIVERS_UNDER_25
            ,SUM(PEDS_OVER_65) AS PEDS_OVER_65
            ,SUM(PEDS_UNDER_12) AS PEDS_UNDER_12
            ,SUM(BIKERS_OVER_65) AS BIKERS_OVER_65
            ,SUM(BIKERS_UNDER_18) AS BIKERS_UNDER_18
            ,SUM(OOS_VEHICLES) AS OOS_VEHICLES
            ,SUM(NUM_CARS) AS NUM_CARS
            ,SUM(NUM_SUVS_OR_TRUCKS) AS NUM_SUVS_OR_TRUCKS
            ,SUM(PED_INJURIES) AS PEDESTRIAN_INJURIES
            ,SUM(BICYCLE_INJURIES) AS BICYCLE_INJURIES
            ,SUM(VEHICLE_INJURIES) AS VEHICLE_INJURIES
            ,SUM(PED_FATALITIES) AS PEDESTRIAN_FATALITIES
            ,SUM(BICYCLE_FATALITIES) AS BICYCLE_FATALITIES
            ,SUM(VEHICLE_FATALITIES) AS VEHICLE_FATALITIES
            ,SUM(DRIVER_TICKETS) AS DRIVER_TICKETS
            ,SUM(DRIVERS_SPEEDING) AS DRIVERS_SPEEDING
            ,SUM(DRIVERS_IMPAIRED) AS DRIVERS_IMPAIRED
            ,SUM(BICYCLE_TICKETS) AS BICYCLE_TICKETS
            ,SUM(PED_TICKETS) AS PED_TICKETS
            ,SUM(TOTAL_INJURIES) AS TOTAL_INJURIES
            ,SUM(TOTAL_MAJOR_INJURIES) AS TOTAL_MAJOR_INJURIES
            ,SUM(TOTAL_MINOR_INJURIES) AS TOTAL_MINOR_INJURIES
            ,SUM(TOTAL_VEHICLES) AS TOTAL_VEHICLES
            ,SUM(TOTAL_PEDESTRIANS) AS TOTAL_PEDESTRIANS
            ,SUM(TOTAL_BICYCLISTS) AS TOTAL_BICYCLISTS
            ,ARRAY_AGG(PERSONTYPE) AS PERSONTYPE_ARRAY
            ,ARRAY_AGG(INVEHICLETYPE) AS INVEHICLETYPE_ARRAY
            ,ARRAY_AGG(LICENSEPLATESTATE) AS LICENSEPLATESTATE_ARRAY
        FROM tmp.crash_details
        GROUP BY CRIMEID
    ) ;
    create index crime_id on tmp.crash_details_agg (crimeid);
    """

    join_query = """
    DROP TABLE IF EXISTS tmp.crashes_join;
    CREATE TABLE tmp.crashes_join
    AS (
        SELECT 
            a.OBJECTID
                ,a.CRIMEID
                ,a.REPORTDATE
                ,a.FROMDATE
                ,a.TODATE 
                ,a.ADDRESS
                ,a.mpdlatitude
                ,a.mpdlongitude
                ,CASE WHEN b.CRIMEID IS NULL OR b.BICYCLE_INJURIES < (a.MAJORINJURIES_BICYCLIST + a.MINORINJURIES_BICYCLIST + a.UNKNOWNINJURIES_BICYCLIST)
                    THEN (a.MAJORINJURIES_BICYCLIST + a.MINORINJURIES_BICYCLIST + a.UNKNOWNINJURIES_BICYCLIST)
                    ELSE b.BICYCLE_INJURIES END AS BICYCLE_INJURIES
                ,CASE WHEN b.CRIMEID IS NULL OR b.VEHICLE_INJURIES < (a.MAJORINJURIES_DRIVER+a.MINORINJURIES_DRIVER+a.UNKNOWNINJURIES_DRIVER+a.MAJORINJURIESPASSENGER+a.MINORINJURIESPASSENGER+a.UNKNOWNINJURIESPASSENGER)
                    THEN (a.MAJORINJURIES_DRIVER+a.MINORINJURIES_DRIVER+a.UNKNOWNINJURIES_DRIVER+a.MAJORINJURIESPASSENGER+a.MINORINJURIESPASSENGER+a.UNKNOWNINJURIESPASSENGER)
                    ELSE b.VEHICLE_INJURIES END AS VEHICLE_INJURIES
                ,CASE WHEN b.CRIMEID IS NULL OR b.PEDESTRIAN_INJURIES < (a.MAJORINJURIES_PEDESTRIAN+ a.MINORINJURIES_PEDESTRIAN + a.UNKNOWNINJURIES_PEDESTRIAN)
                    THEN (a.MAJORINJURIES_PEDESTRIAN + a.MINORINJURIES_PEDESTRIAN + a.UNKNOWNINJURIES_PEDESTRIAN)
                    ELSE b.PEDESTRIAN_INJURIES END AS PEDESTRIAN_INJURIES
                ,CASE WHEN b.CRIMEID IS NULL OR b.TOTAL_INJURIES < (a.MAJORINJURIES_PEDESTRIAN+ a.MINORINJURIES_PEDESTRIAN + a.UNKNOWNINJURIES_PEDESTRIAN
                                                                    +a.MAJORINJURIES_DRIVER+a.MINORINJURIES_DRIVER+a.UNKNOWNINJURIES_DRIVER+a.MAJORINJURIESPASSENGER+a.MINORINJURIESPASSENGER+a.UNKNOWNINJURIESPASSENGER
                                                                    +a.MAJORINJURIES_BICYCLIST + a.MINORINJURIES_BICYCLIST + a.UNKNOWNINJURIES_BICYCLIST)
                        THEN (a.MAJORINJURIES_PEDESTRIAN+ a.MINORINJURIES_PEDESTRIAN + a.UNKNOWNINJURIES_PEDESTRIAN
                                                                    +a.MAJORINJURIES_DRIVER+a.MINORINJURIES_DRIVER+a.UNKNOWNINJURIES_DRIVER+a.MAJORINJURIESPASSENGER+a.MINORINJURIESPASSENGER+a.UNKNOWNINJURIESPASSENGER
                                                                    +a.MAJORINJURIES_BICYCLIST + a.MINORINJURIES_BICYCLIST + a.UNKNOWNINJURIES_BICYCLIST)
                        ELSE b.TOTAL_INJURIES end as TOTAL_INJURIES 

                ,CASE WHEN b.CRIMEID IS NULL OR b.TOTAL_MAJOR_INJURIES < (a.MAJORINJURIES_PEDESTRIAN+
                                                                    +a.MAJORINJURIES_DRIVER+a.MAJORINJURIESPASSENGER
                                                                    +a.MAJORINJURIES_BICYCLIST)
                        THEN (a.MAJORINJURIES_PEDESTRIAN+a.MAJORINJURIES_DRIVER+a.MAJORINJURIESPASSENGER+a.MAJORINJURIES_BICYCLIST)
                        ELSE b.TOTAL_MAJOR_INJURIES end as TOTAL_MAJOR_INJURIES 

                ,CASE WHEN b.CRIMEID IS NULL OR b.TOTAL_MINOR_INJURIES < (a.MINORINJURIES_PEDESTRIAN + a.UNKNOWNINJURIES_PEDESTRIAN
                                                                    +a.MINORINJURIES_DRIVER+a.UNKNOWNINJURIES_DRIVER+a.MINORINJURIESPASSENGER+a.UNKNOWNINJURIESPASSENGER
                                                                    +a.MINORINJURIES_BICYCLIST + a.UNKNOWNINJURIES_BICYCLIST)
                    THEN (a.MINORINJURIES_PEDESTRIAN + a.UNKNOWNINJURIES_PEDESTRIAN
                                                                    +a.MINORINJURIES_DRIVER+a.UNKNOWNINJURIES_DRIVER+a.MINORINJURIESPASSENGER+a.UNKNOWNINJURIESPASSENGER
                                                                    +a.MINORINJURIES_BICYCLIST + a.UNKNOWNINJURIES_BICYCLIST)
                    ELSE b.TOTAL_MINOR_INJURIES end as TOTAL_MINOR_INJURIES     

                ,CASE WHEN b.CRIMEID IS NULL OR b.BICYCLE_FATALITIES < a.FATAL_BICYCLIST
                    THEN a.FATAL_BICYCLIST 
                    ELSE b.BICYCLE_FATALITIES END AS BICYCLE_FATALITIES
                ,CASE WHEN b.CRIMEID IS NULL OR b.PEDESTRIAN_FATALITIES < a.FATAL_PEDESTRIAN
                    THEN a.FATAL_PEDESTRIAN 
                    ELSE b.PEDESTRIAN_FATALITIES END AS PEDESTRIAN_FATALITIES
                ,CASE WHEN b.CRIMEID IS NULL OR b.VEHICLE_FATALITIES < (a.FATAL_DRIVER+a.FATALPASSENGER)
                    THEN (a.FATAL_DRIVER+a.FATALPASSENGER) 
                    ELSE b.VEHICLE_FATALITIES END AS VEHICLE_FATALITIES
                ,CASE WHEN b.CRIMEID IS NULL or b.DRIVERS_IMPAIRED < a.DRIVERSIMPAIRED THEN a.DRIVERSIMPAIRED ELSE b.DRIVERS_IMPAIRED END AS DRIVERS_IMPAIRED 
                ,CASE WHEN b.CRIMEID IS NULL or b.DRIVERS_SPEEDING < a.SPEEDING_INVOLVED THEN a.SPEEDING_INVOLVED ELSE b.DRIVERS_SPEEDING END AS DRIVERS_SPEEDING 

                ,CASE WHEN b.CRIMEID IS NULL or b.TOTAL_VEHICLES < a.TOTAL_VEHICLES THEN a.TOTAL_VEHICLES ELSE b.TOTAL_VEHICLES END AS TOTAL_VEHICLES 
                ,CASE WHEN b.CRIMEID IS NULL or b.TOTAL_BICYCLISTS < a.TOTAL_BICYCLES THEN a.TOTAL_BICYCLES ELSE b.TOTAL_BICYCLISTS END AS TOTAL_BICYCLISTS 
                ,CASE WHEN b.CRIMEID IS NULL or b.TOTAL_PEDESTRIANS < a.TOTAL_PEDESTRIANS THEN a.TOTAL_PEDESTRIANS ELSE b.TOTAL_PEDESTRIANS END AS TOTAL_PEDESTRIANS 
                ,b.DRIVERS_OVER_65
                ,b.DRIVERS_UNDER_25
                ,b.PEDS_OVER_65
                ,b.PEDS_UNDER_12
                ,b.BIKERS_OVER_65
                ,b.BIKERS_UNDER_18
                ,b.OOS_VEHICLES
                ,b.NUM_CARS
                ,b.NUM_SUVS_OR_TRUCKS
                ,b.DRIVER_TICKETS
                ,b.BICYCLE_TICKETS
                ,b.PED_TICKETS
                ,b.PERSONTYPE_ARRAY
                ,b.INVEHICLETYPE_ARRAY
                ,b.LICENSEPLATESTATE_ARRAY
                ,a.INTAPPROACHDIRECTION
                ,a.LOCATIONERROR 
                ,a.LASTUPDATEDATE
                ,a.BLOCKKEY
                ,a.SUBBLOCKKEY
                ,ST_Force2D(a.geography::geometry) as geography

        FROM source_data.crashes_raw a
        LEFT JOIN tmp.crash_details_agg b on a.CRIMEID = b.CRIMEID
        WHERE date_part('year', a.fromdate) >=2015
    ) ;
    CREATE INDEX crashes_geom_idx ON tmp.crashes_join USING GIST (geography);
    """

    # join in the pulsepoint info
    pulsepoint_join_query = """

    DROP TABLE IF EXISTS tmp.crash_pulsepoint_join;
    CREATE TABLE tmp.crash_pulsepoint_join 
    AS (SELECT * 
    FROM (
        SELECT DISTINCT a.* 
        ,b.Agency_Incident_ID as pp_agency_incident_id
        ,b.unit_status_transport as pp_total_injuries
        ,b.transport_unit_is_amr as pp_total_minor_injuries
        ,b.transport_unit_is_non_amr as pp_total_major_injuries
            ,Row_Number() over (partition by a.objectid order by ST_Distance(a.geography, b.geography)) as PP_Call_Distance_Rank
            ,Row_Number() over (partition by a.objectid order by (a.reportdate at time zone 'America/New_York')  - (b.CALL_RECEIVED_DATETIME at time zone 'America/New_York')) as PP_Call_Time_Rank
        FROM tmp.crashes_join a
        LEFT JOIN analysis_data.pulsepoint b on ST_DWITHIN(a.geography, b.geography, 200) 
            AND cast(fromdate as date) =cast((call_received_datetime at time zone 'America/New_York') as date)
            AND (b.CALL_RECEIVED_DATETIME at time zone 'America/New_York')  < (a.reportdate at time zone 'America/New_York') 
    ) tmp WHERE PP_Call_Distance_Rank = 1
    ) ;

    CREATE INDEX IF NOT EXISTS crash_pulsepoint_join_geom_idx ON tmp.crash_pulsepoint_join USING GIST (geography);

    alter table tmp.crash_pulsepoint_join drop column PP_Call_Distance_Rank;
    """

    # First execute the table-specific queries
    engine.execute(add_columns_query)
    print("add columns query complete")
    engine.execute(group_by_query)
    print("group by query complete")
    engine.execute(join_query)
    print("join query complete")
    engine.execute(pulsepoint_join_query)
    print("pulsepoint join query complete")

    # Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
    next_tables = add_location_info(engine=engine,
                                    target_schema='tmp',
                                    target_table='crashes_nbh_ward',
                                    from_schema='tmp',
                                    from_table='crash_pulsepoint_join',
                                    partition_by_field='objectid')
    print("neighborhood-ward query complete")
    next_tables = add_school_info(engine=engine,
                                  target_schema='tmp',
                                  target_table='crashes_schools',
                                  from_schema=next_tables[0],
                                  from_table=next_tables[1])
    print("schools query complete")
    next_tables = add_walkscore_info(engine=engine,
                                     target_schema='tmp',
                                     target_table='crashes_walkscore',
                                     from_schema=next_tables[0],
                                     from_table=next_tables[1])
    print("walkscore query complete")
    next_tables = add_roadway_info(engine=engine,
                                   target_schema='tmp',
                                   target_table='crashes_roadway_info',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1],
                                   partition_by_field='objectid',
                                   within_distance=0.001)
    print("roadway info query complete")
    next_tables = add_intersection_info(
        engine=engine,
        target_schema='tmp',
        target_table='crashes_intersection_info',
        from_schema=next_tables[0],
        from_table=next_tables[1],
        partition_by_field='objectid',
        within_distance=10)
    print("intersection info query complete")
    row_count = create_final_table(engine=engine,
                                   target_schema='analysis_data',
                                   target_table='dc_crashes_w_details',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1])
    print("final query complete with row count ", row_count)
示例#5
0
print("dupe check query complete")

engine.execute(crashes_join_query)
print("join to crashes query complete")

# Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
next_tables = add_location_info(engine=engine,
                                target_schema='tmp',
                                target_table='pulsepoint_nbh_ward',
                                from_schema='tmp',
                                from_table='pulsepoint_crash_join',
                                partition_by_field='Agency_Incident_ID')
print("neighborhood-ward query complete")
next_tables = add_school_info(engine=engine,
                              target_schema='tmp',
                              target_table='pulsepoint_schools',
                              from_schema=next_tables[0],
                              from_table=next_tables[1])
print("schools query complete")
next_tables = add_walkscore_info(engine=engine,
                                 target_schema='tmp',
                                 target_table='pulsepoint_walkscore',
                                 from_schema=next_tables[0],
                                 from_table=next_tables[1])
print("walkscore query complete")
next_tables = add_roadway_info(engine=engine,
                               target_schema='tmp',
                               target_table='pulsepoint_roadway_info',
                               from_schema=next_tables[0],
                               from_table=next_tables[1],
                               partition_by_field='Agency_Incident_ID',
    LEFT JOIN analysis_data.roadway_blocks b on ST_DWithin(a.geography, b.geography, 0.001)
	GROUP BY a.intersectionid, a.intersection_type, a.geography, a.street_names
) ;

CREATE INDEX tmp_intersection_points_road_types_index ON tmp.intersection_points_road_types USING GIST (geography);
"""

# First execute the table-specific queries
engine.execute(step1_query)
print("step1 query complete")

# Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
next_tables = add_location_info(engine=engine,
                                target_schema='tmp',
                                target_table='intersection_points_nbh_ward',
                                from_schema='tmp',
                                from_table='intersection_points_road_types',
                                partition_by_field='intersectionid')
print("neighborhood-ward query complete")
next_tables = add_school_info(engine=engine,
                              target_schema='tmp',
                              target_table='intersection_points_schools',
                              from_schema=next_tables[0],
                              from_table=next_tables[1])
print("schools query complete")
row_count = create_final_table(engine=engine,
                               target_schema='analysis_data',
                               target_table='intersection_points',
                               from_schema=next_tables[0],
                               from_table=next_tables[1])
print("final query complete with row count ", row_count)
    ,ST_SetSRID(ST_MakePoint(cast(xcoord as numeric), cast(ycoord as numeric)), 4326)::geography as geography
FROM source_data.metro_stations_daily_ridership 
WHERE station is not null
)
"""

# First execute the table-specific queries
engine.execute(step1_query)
print("step1 query complete")

# Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
next_tables = add_location_info(
    engine=engine,
    target_schema='tmp',
    target_table='metro_stations_ridership_nbh_ward',
    from_schema='tmp',
    from_table='metro_stations_ridership',
    partition_by_field='objectid')
print("neighborhood-ward query complete")
next_tables = add_school_info(engine=engine,
                              target_schema='tmp',
                              target_table='metro_stations_ridership_schools',
                              from_schema=next_tables[0],
                              from_table=next_tables[1])
print("schools query complete")
row_count = create_final_table(engine=engine,
                               target_schema='analysis_data',
                               target_table='metro_stations_ridership',
                               from_schema=next_tables[0],
                               from_table=next_tables[1])
print("final query complete with row count ", row_count)
def generate_moving_violations_table(AWS_Credentials: dict, **kwargs):

    # if no environment is specified default to dev
    env = kwargs.get('env', None)
    if env == None:
        env = 'DEV'
    env = env.upper()

    # set up RDS and S3 connections, engines, cursors
    region = AWS_Credentials['region']
    engine = create_postgres_engine(destination="AWS_PostGIS", env=env)

    # First move all source data records to a temp table
    step_1_query = """

    CREATE TABLE IF NOT EXISTS tmp.moving_violations_need_geo as 
    SELECT * FROM source_data.moving_violations
    WHERE geography IS NULL;

    CREATE TABLE IF NOT EXISTS tmp.moving_violations_has_geo as 
    SELECT * FROM source_data.moving_violations
    WHERE geography IS NOT NULL;

    CREATE INDEX IF NOT EXISTS mv_location_index ON tmp.moving_violations_need_geo (location);
    """

    engine.execute(step_1_query)
    print("temp table created")

    # geocode the locations
    records = [
        loc for (loc, ) in engine.execute(
            "select distinct location from tmp.moving_violations_need_geo where geography is null limit 2000"
        ).fetchall()
    ]
    print(len(records), " records passed to geocode function")
    geocode_text(engine=engine,
                 records_to_geocode=records,
                 administrative_area='District of Columbia',
                 text_type='Moving Violations location')

    # update lat and long values from new data
    step_2_query = """
    UPDATE tmp.moving_violations_need_geo
    SET geography = source_data.geocoded_text.point_geography
    FROM source_data.geocoded_text 
    WHERE source_data.geocoded_text.text = location
    ;

    INSERT INTO tmp.moving_violations_has_geo
    SELECT * FROM tmp.moving_violations_need_geo;

    CREATE INDEX IF NOT EXISTS mv_geom_idx ON tmp.moving_violations_has_geo USING GIST (geography);
    """

    engine.execute(step_2_query)
    print("geo values updated")

    # Then execute the same location-info queries (roadway, schools, neighborhoods) that apply to all analysis tables and create the final table
    next_tables = add_location_info(engine=engine,
                                    target_schema='tmp',
                                    target_table='moving_violations_nbh_ward',
                                    from_schema='tmp',
                                    from_table='moving_violations_has_geo',
                                    partition_by_field='objectid')
    print("neighborhood-ward query complete")
    next_tables = add_school_info(engine=engine,
                                  target_schema='tmp',
                                  target_table='moving_violations_schools',
                                  from_schema=next_tables[0],
                                  from_table=next_tables[1])
    print("schools query complete")
    next_tables = add_roadway_info(
        engine=engine,
        target_schema='tmp',
        target_table='moving_violations_roadway_info',
        from_schema=next_tables[0],
        from_table=next_tables[1],
        partition_by_field='objectid',
        within_distance=50)
    print("roadway info query complete")
    next_tables = add_intersection_info(
        engine=engine,
        target_schema='tmp',
        target_table='moving_violations_intersection_info',
        from_schema=next_tables[0],
        from_table=next_tables[1],
        partition_by_field='objectid',
        within_distance=20)
    print("intersection info query complete")
    row_count = create_final_table(engine=engine,
                                   target_schema='analysis_data',
                                   target_table='moving_violations',
                                   from_schema=next_tables[0],
                                   from_table=next_tables[1])
    print("final query complete with row count ", row_count)