Пример #1
0
def get_connection(source):
    '''
    Get a DB connection from the CLI args or defaults to postgres:///mydb

    '''
    source.engine = create_engine(source.db_name)
    ui.header('Connecting to database %s' % source.db_name)

    if not database_exists(source.engine.url):
        create_database(source.engine.url)
        ui.item("Creating database %s" % source.db_name)

    Session = sessionmaker()
    Session.configure(bind=source.engine)

    source.session = Session()

    gis_q = 'SELECT PostGIS_version();'
    # Check for PostGIS support
    try:
        source.session.execute(gis_q)
        source.geo = True
    except OperationalError:
        source.geo = False
    except ProgrammingError:
        source.geo = False
    source.session.commit()

    if source.geo:
        ui.item('PostGIS is installed. Geometries will be imported '
                'as PostGIS geoms.')
Пример #2
0
    def parse_items(output_dict):
        try:

            for dataset in output[output_dict]:
                location, tbl_name = list(dataset.items())[0]
                source = source_mapper[output_dict](location)
                if tbl_name:
                    source.tbl_name = tbl_name
                insert_source(source)
        except Exception as e:

            ui.item(("Skipping %s load due to error: \"%s\". Double check " +
                     "formatting of bulk_load.yaml if this was " +
                     "unintentional.") % (output_dict, e))
            print()
            pass
Пример #3
0
def get_binding(source):
    '''
    Translate the source's metadata into a SQLAlchemy binding

    This looks at each column type in the metadata and creates a
    SQLAlchemy binding with columns to match. For now it fails loudly if it
    encounters a column type we've yet to map to its SQLAlchemy type.
    '''

    record_fields = {
        '__tablename__': source.tbl_name,
        '_pk_': Column(Integer, primary_key=True)
    }

    ui.header('Setting up new table, "%s", from %s source fields' %
              (source.tbl_name, source.name))

    for col_name, col_type in source.metadata:

        if isinstance(col_type, type(Geometry())) and not source.geo:
            try:
                source.session.execute("CREATE EXTENSION POSTGIS;")
                ui.item(
                    "Adding PostGIS extension to support %s column." \
                    % col_name)
                source.session.commit()
                source.geo = True
            except:
                msg = (
                    '"%s" is a %s column but your database doesn\'t support '
                    'PostGIS so it\'ll be skipped.') % (
                        col_name,
                        col_type,
                    )
                ui.item(msg)
                continue

        if col_name.startswith(':@computed'):
            ui.item('Ignoring computed column "%s".' % col_name)

            continue

        try:

            assert (col_type
                    ), 'Unable to map %s type to a SQL type.' % (source.name)
            record_fields[col_name] = Column(col_type)

        except NotImplementedError as e:
            ui.item('%s' % str(e))

    source.binding = type('DataRecord', (declarative_base(), ), record_fields)
Пример #4
0
def insert_source(source):
    '''
    Gets the connection and binding and inserts data.
    '''

    get_connection(source)

    if not isinstance(source, sc.CenPy):
        get_binding(source)

    if source.engine.dialect.has_table(source.engine, source.tbl_name):
        print()
        warnings.warn(("Destination table already exists. Current table " +
                       "will be dropped and replaced."))
        print()
        if not isinstance(source, sc.CenPy):
            source.binding.__table__.drop(source.engine)

    try:
        if not isinstance(source, sc.CenPy):
            source.binding.__table__.create(source.engine)
    except ProgrammingError as e:

        raise CLIError('Error creating destination table: %s' % str(e))

    circle_bar = FillingCirclesBar('  ▶ Loading from source',
                                   max=source.num_rows)

    source.insert(circle_bar)

    circle_bar.finish()

    ui.item('Committing rows (this can take a bit for large datasets).')
    source.session.commit()

    success = 'Successfully imported %s rows.' % (source.num_rows)
    ui.header(success, color='\033[92m')
    if source.name == "Socrata" and source.client:
        source.client.close()

    return
Пример #5
0
def get_binding(dataset_metadata, geo, dest, source):
    """Translate the Socrata API metadata into a SQLAlchemy binding

    This looks at each column type in the Socrata API response and creates a
    SQLAlchemy binding with columns to match. For now it fails loudly if it
    encounters a column type we've yet to map to its SQLAlchemy type."""
    if dest:
        table_name = dest        
    elif source == "Socrata":
        table_name = get_table_name(dataset_metadata['name'])

    record_fields = {
        '__tablename__': table_name,
        '_pk_': Column(Integer, primary_key=True)
    }

    ui.header(
        'Setting up new table, "%s", from %s source fields' % (table_name, source)
    )

    geo_types = ('location', 'point', 'multipolygon', 'esriFieldTypeGeometry')

    for col in dataset_metadata:
        if source == "Socrata":
            col_name = col['fieldName'].lower()
            col_type = col['dataTypeName']
        elif source == "HUD":
            col_name = col['name'].lower()
            col_type = col['type']

        if col_type in geo_types and geo is False:
            msg = (
                '"%s" is a %s column but your database doesn\'t support '
                'PostGIS so it\'ll be skipped.'
            ) % (col_name, col_type,)
            ui.item(msg)
            continue

        if col_name.startswith(':@computed'):
            ui.item('Ignoring computed column "%s".' % col_name)
            continue

        try:
            print(col_name, ": ", col_type)
            record_fields[col_name] = get_sql_col(col_type, source)

        except NotImplementedError as e:
            ui.item('%s' % str(e))
            continue

    return type('SocrataRecord', (declarative_base(),), record_fields)
Пример #6
0
def get_connection(db_str, dataset_metadata, source):
    """Get a DB connection from the CLI args and Socrata API metadata

    Uess the DB URL passed in by the user to generate a database connection.
    By default, returns a local SQLite database."""
    if db_str:
        engine = create_engine(db_str)
        ui.header('Connecting to database')
    else:
        default = default_db_str(source)
        ui.header('Connecting to database')
        engine = create_engine(default)
        ui.item('Using default SQLite database "%s".' % default)

    Session = sessionmaker()
    Session.configure(bind=engine)

    session = Session()

    # Check for PostGIS support
    gis_q = 'SELECT PostGIS_version();'
    try:
        session.execute(gis_q)
        geo_enabled = True
    except OperationalError:
        geo_enabled = False
    except ProgrammingError:
        geo_enabled = False
    finally:
        session.commit()

    if geo_enabled:
        ui.item(
            'PostGIS is installed. Geometries will be imported '
            'as PostGIS geoms.'
        )
    else:
        ui.item('Query "%s" failed. Geometry columns will be skipped.' % gis_q)

    return engine, session, geo_enabled
Пример #7
0
def load_yaml():
    output = yaml.load(open('bulk_load.yaml'), Loader=Loader)
    db_name = output['DATABASE']

    source_mapper = {
        'GEOJSONS': sc.GeoJson,
        'SHAPEFILES': sc.Shape,
        'CSVS': sc.Csv,
        'EXCELS': sc.Excel,
        'HUD_TABLES': sc.HudPortal
    }

    def parse_items(output_dict):
        try:

            for dataset in output[output_dict]:
                location, tbl_name = list(dataset.items())[0]
                source = source_mapper[output_dict](location)
                if tbl_name:
                    source.tbl_name = tbl_name
                insert_source(source)
        except Exception as e:

            ui.item(("Skipping %s load due to error: \"%s\". Double check " +
                     "formatting of bulk_load.yaml if this was " +
                     "unintentional.") % (output_dict, e))
            print()
            pass

    for output_dict in source_mapper.keys():
        parse_items(output_dict)

    try:
        socrata_sites = output.get('SOCRATA').get('sites')
        app_token = output.get('SOCRATA').get('app_token')
        if socrata_sites:
            for site in socrata_sites:
                url = site['url']
                for dataset in site['datasets']:
                    dataset_id, tbl_name = list(dataset.items())[0]
                    source = sc.SocrataPortal(url, dataset_id, app_token,
                                              tbl_name)
                    insert_source(source)
    except Exception as e:
        ui.item(("Skipping Socrata load due to error: \"%s\". Double check " +
            "formatting of bulk_load.yaml if this is was " +
            "unintentional.") \
            % e)
        print()
        pass

    try:
        place_type = output['CENSUS'].get('place_type')
        place_name = output['CENSUS'].get('place_name')
        level = output['CENSUS'].get('level')
        for dataset in output['CENSUS']['datasets']:
            if dataset.get('ACS'):
                product = 'ACS'
            if dataset.get('DECENNIAL2010'):
                product = 'Decennial2010'
            year = dataset[product].get('year')
            tbl_name = dataset[product]['tbl_name']
            variables = dataset[product]['variables']
            source = sc.CenPy(product, year, place_type, place_name, level,
                              variables)
    except Exception as e:
        ui.item(
            ("Skipping Census load due to error: \"%s\". Double check " +
             "formatting of bulk_load.yaml if this was unintentional.") % e)
        print()
        pass
Пример #8
0
def main():
    arguments = docopt(__doc__)

    site = arguments['<site>']

    if arguments['--HUD']:
        source = "HUD"
        dataset_id = site
        client = None
    if arguments['--Socrata']:
        source = "Socrata"
        client = Socrata(site, arguments.get('-a'))

    try:
        if arguments.get('ls'):
            datasets = list_datasets(client, site)
            print(tabulate(datasets, headers='keys', tablefmt='psql'))
        elif arguments.get('insert'):        
            if source == "Socrata":
                dataset_id = arguments['<dataset_id>']
                metadata = client.get_metadata(dataset_id)['columns']
            if source == "HUD":
                metadata = json.loads(
                    urllib.request.urlopen(site).read())['fields']

            engine, session, geo = \
                get_connection(arguments['-d'], metadata, source)
            
            if arguments['-t']:
                Binding = get_binding(
                    metadata, geo, arguments['-t'], source
                )
            else:
                Binding = get_binding(
                    metadata, geo, dataset_id, source
                )

            # Create the table
            try:
                Binding.__table__.create(engine)
            except ProgrammingError as e:
                # Catch these here because this is our first attempt to
                # actually use the DB
                if 'already exists' in str(e):
                    raise CLIError(
                        'Destination table already exists. Specify a new table'
                        ' name with -t.'
                    )
                raise CLIError('Error creating destination table: %s' % str(e))

            num_rows, data = get_data(source, dataset_id, client)
            bar = FillingCirclesBar('  ▶ Loading from source', max=num_rows)

            # Iterate the dataset and INSERT each page
            if source == "Socrata":
                for page in data:
                    insert_data(page, session, bar, Binding)

            if source == "HUD":
                insert_data(data, session, bar, Binding)

            bar.finish()

            ui.item(
                'Committing rows (this can take a bit for large datasets).'
            )
            session.commit()

            success = 'Successfully imported %s rows.' % (
                num_rows
            )
            ui.header(success, color='\033[92m')
        if client:
            client.close()
    except CLIError as e:
        ui.header(str(e), color='\033[91m')