Exemplo n.º 1
0
def _create_census_table_from_csv(raw_csv_path, target_table):
    """Create a census table using the columns from the provided csv."""
    print('Creating table {}.'.format(target_table))
    with open(raw_csv_path, 'r', newline='') as csv_file:
        reader = csv.reader(csv_file)
        columns = next(reader)

    columns = [
        _clean_column_name(col) for col in columns if col not in UNUSED_COLUMNS
    ]
    datatypes = [
        'VARCHAR' if col == 'census_tract' else 'DECIMAL' for col in columns
    ]
    columns_with_types = [
        '{} {}'.format(col, dtype) for col, dtype in zip(columns, datatypes)
    ]
    if 'census_tract' in columns:
        primary_key_index = columns.index('census_tract')
        columns_with_types[primary_key_index] += ' PRIMARY KEY '

    create_table_query = """
        CREATE TABLE {target_table} (
            {columns_with_types}
        )
        ;
    """.format(target_table=target_table,
               columns_with_types=', '.join(columns_with_types))
    engine = connect.create_db_engine()
    engine.execute(create_table_query)
Exemplo n.º 2
0
def minimal_fetch_representative_points(service_area_ids,
                                        engine=connect.create_db_engine()):
    """
    Fetch representative points for a list of service areas.

    No transformations are applied to the points. In particular, census data is not added and the
    format does not necessarily match the frontend's expectations.
    """
    if not service_area_ids:
        return []

    query_params = {'id_list': tuple(service_area_ids)}

    select_query = """
        SELECT {cols}
        FROM {table_name}
        WHERE service_area_id IN %(id_list)s
        ORDER BY id
        ;
    """.format(
        cols=', '.join(MINIMAL_RP_COLUMNS),
        table_name=representative_point.RepresentativePoint.__tablename__,
    )

    return [
        dict(row)
        for row in engine.execute(select_query, query_params).fetchall()
    ]
Exemplo n.º 3
0
def fetch_representative_points(service_area_ids,
                                include_census_data,
                                engine=connect.create_db_engine()):
    """
    Fetch representative points for a list of service areas.

    Prepares responses for use by the frontend.
    """
    if not service_area_ids:
        return []

    query_params = {'id_list': tuple(service_area_ids)}

    # Set census mapping.
    census_mapping = CENSUS_FIELDS_BY_CATEGORY if include_census_data else {}

    if include_census_data:
        join_list = ' '.join([
            """
            LEFT JOIN {table}
            ON (representative_points.census_tract = {table}.census_tract)
        """.format(table=table) for table in CENSUS_TABLES
        ])

        select_query = """
            SELECT {cols}
            FROM {table_name}
            {joins}
            WHERE service_area_id IN %(id_list)s
            ORDER BY id
            ;
        """.format(
            cols=', '.join(RP_COLUMNS + readable_columns_from_census_mapping(
                CENSUS_FIELDS_BY_CATEGORY)),
            table_name=representative_point.RepresentativePoint.__tablename__,
            joins=join_list,
        )
        logger.info('Fetching representative_points with census data.')
    else:
        select_query = """
            SELECT {cols}
            FROM {table_name}
            WHERE service_area_id IN %(id_list)s
            ORDER BY id
            ;
        """.format(
            cols=', '.join(RP_COLUMNS),
            table_name=representative_point.RepresentativePoint.__tablename__)

    return [
        representative_point.row_to_dict(row, census_mapping=census_mapping)
        for row in engine.execute(select_query, query_params).fetchall()
    ]
Exemplo n.º 4
0
def calculate_measurement_matrix(
        service_area_ids,
        locations,
        measurer_name,
        engine=connect.create_db_engine(),
):
    """
    Calculate a measurement matrix for the given service area IDs.

    The measurement between point i and location j in the cell with row i, column j.
    """
    # TODO: Share introduction of this function with calculate.adequacy.
    location_to_id_map = collections.defaultdict(list)
    for j, location in enumerate(locations):
        # TODO - Permanently fix this on the frontend side.
        location.pop('id')
        location_to_id_map[Point(**location)].append(j)

    locations = list(location_to_id_map.keys())
    points = representative_points.minimal_fetch_representative_points(
        service_area_ids=service_area_ids, engine=engine)

    logger.debug('{} pairwise distances to calculate.'.format(
        len(locations) * len(points)))

    measurer = get_measurer(measurer_name)
    measurer_config = config.get('measurer_config')[measurer_name]
    executor_type = measurer_config['adequacy_executor_type']
    n_processors = measurer_config['n_adequacy_processors']

    logger.debug('Starting {} executors for gravity calculations...'.format(
        n_processors))
    with executor_type(processes=n_processors) as executor:
        measurements_by_point = executor.starmap(
            func=_measure_one_to_many,
            iterable=zip(
                points,
                itertools.repeat(locations),
                itertools.repeat(measurer),
            ))

    measurements_by_point = _add_provider_ids(
        measurements_by_point=measurements_by_point,
        location_to_id_map=location_to_id_map)

    measurement_matrix = np.full(shape=(len(points), len(locations)),
                                 fill_value=float('inf'))
    for i, response in enumerate(measurements_by_point):
        for j, distance in zip(response['location_ids'],
                               response['measurements']):
            measurement_matrix[i][j] = distance

    return measurement_matrix
Exemplo n.º 5
0
def _execute_outside_of_transaction_block(query):
    """
    Execute a SQL statement outside of a transaction block.

    Bypasses the transaction start enforced by the Python DB-API.
    """
    engine = connect.create_db_engine()
    connection = engine.raw_connection()
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    with connection.cursor() as cur:
        cur.execute(query)
    connection.close()
def check_sample_points_exist():
    """
    Retrieve sample points for a service area from the sample data and if none exists, exit
    with nonzero code to indicate that initial sample data population should proceed.
    """
    engine = connect.create_db_engine(echo=True)
    service_areas = ['ca_los_angeles_county_00000'
                     ]  # A service area from the base sample data.
    results = representative_points.fetch_representative_points(
        service_areas, include_census_data=False, engine=engine)
    if len(results) == 0:
        exit(1)  # Exit nonzero to indicate that no records exist.
Exemplo n.º 7
0
def fetch_all_service_areas(engine=connect.create_db_engine()):
    """
    Fetch all available service areas from the database.

    Returns a dictionary containing service_area_id, county, and zip_code.
    """
    session = sessionmaker(bind=engine)()
    return session.query(
        service_area.ServiceArea.service_area_id,
        service_area.ServiceArea.county, service_area.ServiceArea.zip_code,
        service_area.ServiceArea.state,
        service_area.ServiceArea.nchs_urban_rural_code).order_by(
            service_area.ServiceArea.service_area_id).all()
def initialize_postgres():
    """Initialize Postgres tables."""
    postgres_engine = connect.create_db_engine(echo=True)

    # Create DB if necessary...
    if not database_exists(postgres_engine.url):
        create_database(postgres_engine.url)

    # Install Postgis.
    install_postgis.install()

    # Create tables.
    Base.metadata.create_all(postgres_engine, checkfirst=True)
Exemplo n.º 9
0
def _insert_service_areas(json_features):
    """Insert service areas into the database from a GeoJSON file."""
    print('Inserting service areas...')
    data = _get_all_service_areas(json_features)
    try:
        methods.core_insert(
            engine=connect.create_db_engine(),
            sql_class=service_area.ServiceArea,
            data=data,
            return_insert_ids=False,
        )
    except Exception as e:
        print('Error inserting service areas: {}'.format(str(e)[:1000]))
Exemplo n.º 10
0
def _insert_representative_population_points(json_features):
    """Insert representative points into the database from a GeoJSON file."""
    print('Inserting representative points...')
    data = [_transform_single_point(point) for point in json_features]
    try:
        methods.core_insert(
            engine=connect.create_db_engine(),
            sql_class=representative_point.RepresentativePoint,
            data=data,
            return_insert_ids=False,
        )
        return data
    except Exception as e:
        print('Error inserting representative points: {}'.format(
            str(e)[:1000]))
Exemplo n.º 11
0
def _upload_csv(csv_path, target_table, sep=','):
    """Upload a (headerless) csv to the target table via a COPY command."""
    print('Uploading {} to table {}.'.format(csv_path, target_table))
    engine = connect.create_db_engine()
    conn = engine.raw_connection()
    cur = conn.cursor()
    with open(csv_path, 'r') as csv_file:
        cur.copy_expert(sql="""
                COPY {} FROM STDIN
                WITH CSV
                DELIMITER AS ','
            """.format(target_table),
                        file=csv_file)
    conn.commit()
    cur.close()
    conn.close()
Exemplo n.º 12
0
def test_core_insert():
    engine = connect.create_db_engine()
    address_data = [{
        'address': "Aaaat Brian's House",
        'latitude': 23,
        'longitude': 35,
        'location': postgis.to_point(35, 22)
    }]

    addresss_inserted_primary_key = methods.core_insert(
        engine,
        sql_class=address.Address,
        data=address_data,
        return_insert_ids=True,
        unique_column='address')

    provider_data = [{
        'address_id': addresss_inserted_primary_key[0],
        'languages': ['english', 'spanish'],
        'npi': 'aaa_npi_hello',
        'specialty': 'doctor_for_teddies'
    }]

    provider_inserted_primary_key = methods.core_insert(
        engine,
        sql_class=provider.Provider,
        data=provider_data,
        return_insert_ids=True)

    methods.delete(engine=engine,
                   sql_class=provider.Provider,
                   ids=provider_inserted_primary_key)

    methods.delete(engine=engine,
                   sql_class=address.Address,
                   ids=addresss_inserted_primary_key)
Exemplo n.º 13
0
"""Test adequacy requests for Time-Distance API."""
import json

from backend.app.exceptions.format import InvalidFormat
from backend.app.requests import adequacy
from backend.lib.database.postgres import connect

import flask

from flask_testing import LiveServerTestCase

import mock

import pytest

engine = connect.create_db_engine()


class TestAdequacyRequest(LiveServerTestCase):
    """Test class for adequacy request file."""
    def create_app(self):
        """Start a new flask app for testing."""
        app = flask.Flask(__name__)
        app.config['TESTING'] = True
        return app

    def test_adequacy_request_invalid_json(self):
        """Test adequacy requests in a simple case."""
        def _mock_get_json(force=True):
            raise json.JSONDecodeError(msg='', doc='', pos=0)
Exemplo n.º 14
0
def geocode_providers(
    provider_addresses,
    geocoder_name=GEOCODER,
    engine=connect.create_db_engine()
):
    """Fetch providers locations from list of provider addresses."""
    if not provider_addresses:
        return []

    session = sessionmaker(bind=engine)()

    provider_responses = []

    addresses = {address for address in provider_addresses}
    logger.debug('Searching {} addresses for {} providers.'.format(
        len(addresses), len(provider_addresses))
    )

    if config.get('use_address_cache'):
        existing_addresses = {
            result.address: {
                'latitude': result.latitude,
                'longitude': result.longitude
            } for result in _fetch_addresses_from_db(addresses, session)
        }
        logger.debug('Found {} addresses in DB out of {}.'.format(
            len(existing_addresses), len(addresses))
        )
    else:
        logger.debug('Address database deactivated.')
        existing_addresses = {}

    addresses_to_geocode = addresses.difference(existing_addresses)
    if len(addresses_to_geocode) > 0 and GEOCODING:
        logger.debug('{} addresses to geocode.'.format(len(addresses_to_geocode)))
        logger.debug('Geocoding...')
        geocoded_addresses = _geocode_addresses(
            addresses=addresses_to_geocode,
            geocoder_name=geocoder_name,
            engine=engine,
            add_to_db=config.get('use_address_cache'))
        logger.debug('{} addresses geocoded.'.format(len(geocoded_addresses)))
        if geocoded_addresses:
            existing_addresses.update({
                result.address: {
                    'latitude': result.latitude,
                    'longitude': result.longitude
                } for result in _fetch_addresses_from_db(addresses_to_geocode, session)
            })
    elif len(addresses_to_geocode) == 0:
        logger.debug('No addresses to geocode.')
    elif not GEOCODING:
        logger.debug('Warning - Geocoding is not active. Processing without missing addresses.')

    for i, raw_address in enumerate(provider_addresses):
        if i % 10000 == 0:
            logger.debug('Processsed {} out of {}...'.format(i, len(provider_addresses)))

        # TODO - Fuzzy matching.
        if raw_address in existing_addresses:
            geocoded_address = existing_addresses[raw_address]
            provider_responses.append(
                _format_provider_response(
                    geocoded_address=geocoded_address
                )
            )
        else:
            provider_responses.append(
                _format_provider_response(geocoded_address=None)
            )

    logger.debug('Processing done for {} providers.'.format(len(provider_addresses)))

    session.close()
    return provider_responses
Exemplo n.º 15
0
def _drop_table_if_exists(table_name):
    """Drop the provided table if it already exists."""
    print('Dropping {} (if such a table exists).'.format(table_name))
    engine = connect.create_db_engine()
    engine.execute('DROP TABLE IF EXISTS {};'.format(table_name))
Exemplo n.º 16
0
def _get_locations_to_check_by_service_area(service_area_ids,
                                            locations,
                                            radius_in_meters,
                                            engine=connect.create_db_engine()):
    """
    Find locations near each service area.

    This method reduces the number of distance calulations required by `calculate_adequacies`.

    Returns a mapping service_area_id --> list of relevant locations.
    """
    locations_to_check_by_service_area = collections.defaultdict(list)

    # FIXME: Use psycopg2.extras.execute_values to insert these values.
    address_values = [
        '({idx}, ST_SetSRID(ST_Point({longitude}, {latitude}), 4326)::geography)'
        .format(idx=idx,
                longitude=float(provider_address.longitude),
                latitude=float(provider_address.latitude))
        for idx, provider_address in enumerate(locations)
    ]

    query_params = {
        'service_area_id_list': tuple(service_area_ids),
    }

    temp_table_name = table_handling.get_random_table_name(prefix='addr')
    create_temp_table_query = """
        DROP TABLE IF EXISTS {temp_table_name};
        CREATE TEMP TABLE {temp_table_name}  AS
        SELECT * FROM (
            VALUES {address_values_list}
        ) AS t (idx, location);
        CREATE INDEX tmp_{temp_table_name}_gix ON {temp_table_name} USING GIST (location)
        ;
    """.format(temp_table_name=temp_table_name,
               address_values_list=', '.join(address_values))
    gis_query = """
        SELECT
            areas.service_area_id AS service_area_id
            , tmp.idx AS address_idx
        FROM {service_areas} areas
        JOIN {temp_table_name} tmp
            ON (
            ST_DWithin(areas.location, tmp.location, {radius}, FALSE)
        )
        WHERE 1=1
            AND areas.service_area_id IN %(service_area_id_list)s
        ;
    """.format(
        temp_table_name=temp_table_name,
        service_areas=service_area.ServiceArea.__tablename__,
        radius=radius_in_meters,
    )

    full_query = """
        {create_temp_table_query}
        {gis_query}
    """.format(create_temp_table_query=create_temp_table_query,
               gis_query=gis_query)
    query_results = (
        dict(row)
        for row in engine.execute(full_query, query_params).fetchall())

    for row in query_results:
        locations_to_check_by_service_area[row['service_area_id']].append(
            locations[row['address_idx']])

    for service_area_id in service_area_ids:
        if service_area_id not in locations_to_check_by_service_area:
            locations_to_check_by_service_area[service_area_id] = locations

    return locations_to_check_by_service_area
Exemplo n.º 17
0
def test_engine_creation():
    """ Test that we can create a connection without any error."""
    db = connect.create_db_engine()
    assert db is not None
Exemplo n.º 18
0
def test_engine_creation_retry_fail():
    """Test that engines retries to connect by providing a fake URL."""
    with pytest.raises(OperationalError):
        db_engine = connect.create_db_engine(
            db_url='postgresql://*****:*****@fake_url:5432/postgres')
        db_engine.connect()