def handle(self, *args, **options): drop_db('sample_data') project = Project.objects.all()[0] client_fixture = ConfigEntityFixture.resolve_config_entity_fixture( project) default_db_entities = client_fixture.default_db_entities for db_entity_config in default_db_entities: importer = ImportData(config_entity=project, db_entity=db_entity_config) importer.target_database = settings.DATABASES['sample_data'] importer.create_target_db_string() # For now we only import data for DbEntity instances with a configured database url connection_dict = postgres_url_to_connection_dict( db_entity_config['url']) # The import database currently stores tables as public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)] # We always use the table name without the word sample for the target table name source_table = "{0}_{1}_{2}".format(project.key, db_entity_config['table'], 'sample') importer._dump_tables_to_target('-t %s' % source_table, source_schema='public', target_schema='public', source_table=source_table, target_table=source_table, connection_dict=connection_dict)
def __init__(self, **arguments): self.arguments = arguments self.dump_only = self.arguments.get('dump_only', None) self.region_key = self.arguments.get('schema', None) self.target_database = database_settings('default') # The config_entity whose feature tables should be imported self.config_entity = self.arguments.get('config_entity', None) if self.config_entity: logger.info( "Importing DbEntity table into ConfigEntity {0}".format( self.config_entity.subclassed)) # The optional db_entity_key whose Feature class table should be imported. Otherwise all DbEntity tables # are imported for the config_entity, including inherited ones from parent ConfigEntities self.db_entity_key = self.arguments.get('db_entity_key', None) self.db_entities = filter( lambda db_entity: not self.db_entity_key or (db_entity.key == self.db_entity_key), self.config_entity.owned_db_entities()) self.test = self.arguments.get('test', None) # The psql connection to the target server, normally the django server self.create_target_db_string() self.command_execution = CommandExecution(logger) self.target_connection_dict = dict( user=self.target_database['USER'], password=self.target_database['PASSWORD'], host=self.target_database.get('HOST', 'localhost'), port=self.target_database.get('PORT', 5432), database=self.target_database['NAME']) # Used to get around password authentication self.connections = [ "{host}:*:*:{user}:{password}".format( **dict(host=self.target_database['HOST'], user=self.target_database['USER'], password=self.target_database['PASSWORD'])) ] for db_entity in self.db_entities: # Create a password file in order to avoid dealing with stdin for passwords # This has been bypassed in favor of passing the password to stdin if not (db_entity.has_db_url or db_entity.has_file_url): raise Exception( "This db_entity, {0}, has no database or file url".format( db_entity.key)) if db_entity.has_db_url: # Setup the connection strings for the db_entity so that we can get around interactive password authentication # TODO This is never distinct per db_entity. We could just use self.target_connection_dict connection_dict = postgres_url_to_connection_dict( db_entity.url) self.connections.append( "{host}:*:*:{user}:{password}".format(**connection_dict))
def __init__(self, **arguments): self.arguments = arguments self.dump_only = self.arguments.get('dump_only', None) self.region_key = self.arguments.get('schema', None) self.target_database = database_settings('default') # The config_entity whose feature tables should be imported self.config_entity = self.arguments.get('config_entity', None) if self.config_entity: logger.info("Importing DbEntity table into ConfigEntity {0}".format(self.config_entity.subclassed)) # The optional db_entity_key whose Feature class table should be imported. Otherwise all DbEntity tables # are imported for the config_entity, including inherited ones from parent ConfigEntities self.db_entity_key = self.arguments.get('db_entity_key', None) self.db_entities = filter(lambda db_entity: not self.db_entity_key or (db_entity.key == self.db_entity_key), self.config_entity.owned_db_entities()) self.test = self.arguments.get('test', None) # The psql connection to the target server, normally the django server self.create_target_db_string() self.command_execution = CommandExecution(logger) self.target_connection_dict = dict( user=self.target_database['USER'], password=self.target_database['PASSWORD'], host=self.target_database.get('HOST', 'localhost'), port=self.target_database.get('PORT', 5432), database=self.target_database['NAME'] ) # Used to get around password authentication self.connections = ["{host}:*:*:{user}:{password}".format(**dict( host=self.target_database['HOST'], user=self.target_database['USER'], password=self.target_database['PASSWORD']))] for db_entity in self.db_entities: # Create a password file in order to avoid dealing with stdin for passwords # This has been bypassed in favor of passing the password to stdin if not (db_entity.has_db_url or db_entity.has_file_url): raise Exception("This db_entity, {0}, has no database or file url".format(db_entity.key)) if db_entity.has_db_url: # Setup the connection strings for the db_entity so that we can get around interactive password authentication # TODO This is never distinct per db_entity. We could just use self.target_connection_dict connection_dict = postgres_url_to_connection_dict(db_entity.url) self.connections.append("{host}:*:*:{user}:{password}".format(**connection_dict))
def handle(self, *args, **options): drop_db('sample_data') project = Project.objects.all()[0] client_fixture = ConfigEntityFixture.resolve_config_entity_fixture(project) default_db_entities = client_fixture.default_db_entities for db_entity_config in default_db_entities: importer = ImportData(config_entity=project, db_entity=db_entity_config) importer.target_database = settings.DATABASES['sample_data'] importer.create_target_db_string() # For now we only import data for DbEntity instances with a configured database url connection_dict = postgres_url_to_connection_dict(db_entity_config['url']) # The import database currently stores tables as public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)] # We always use the table name without the word sample for the target table name source_table = "{0}_{1}_{2}".format(project.key, db_entity_config['table'], 'sample') importer._dump_tables_to_target('-t %s' % source_table, source_schema='public', target_schema='public', source_table=source_table, target_table=source_table, connection_dict=connection_dict)
def import_data(self, **kwargs): """ Imports data from an external source to create the test data :return a two item tuple containing the region that was imported and a list of the imported projects """ # Calculate a sample lat/lon box of the config_entity config_entity = self.config_entity if self.test: bounds = chop_geom(config_entity.bounds, 0.90) logger.info(u"Creating subselection with extents: {0}. This will be used to crop any table that doesn't have a sample version".format(bounds)) conn = psycopg2.connect(**pg_connection_parameters(settings.DATABASES['default'])) conn.set_isolation_level(psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = conn.cursor() for db_entity in self.db_entities: # This is the index on wkb_geometry. spatial_index_name = '{schema}_{key}_geom_idx'.format(schema=db_entity.schema, key=db_entity.key) table = db_entity.table if db_entity.has_file_url: # Remove any table of the same name from the import schema. This is unlikley since imported # tables have timestamps drop_table('"%s"."%s"' % (settings.IMPORT_SCHEMA, db_entity.key)) sql_file_path = file_url_to_path(db_entity.url) # Create a command that pipes shp2pgsql to psql db_entity.srid = db_entity.srid or '4326' logger.info("verifying SRID {0}".format(db_entity.srid)) verify_srid(db_entity.srid) # Create the import schema if needed PGNamespace.objects.create_schema(settings.IMPORT_SCHEMA) # Import the table import_sql_command = '/usr/bin/psql {0} -f {1}'.format(self.target_database_connection, sql_file_path) stdin = "{0}\n{1}".format(self.arguments.get('password', None), self.target_database.get('PASSWORD', None)) results = self.command_execution.run(import_sql_command, stdin=stdin) if results.returncode: raise Exception(results.stderr.text) # We expect a table in the public schema with a named based on db_entity.key # Move the table from the public schema to the db_entity schema move_to_schema = "alter table {0}.{1} set schema {2};".format(settings.IMPORT_SCHEMA, db_entity.key, db_entity.schema) logger.info("Moving import file table to schema: %s" % move_to_schema) cursor.execute(move_to_schema) # Drop the constraint that enforces the srid of the wkb_geometry if one exists drop_constraint = '''alter table {0}.{1} drop constraint if exists enforce_srid_wkb_geometry'''.format(db_entity.schema, db_entity.key) logger.info("Dropping constraint on wkb_geometry: %s" % drop_constraint) cursor.execute(drop_constraint) # Note we're not creating an index on wkb_geometry # here because imported files already have an index # created. elif db_entity.has_db_url: # The import database currently stores tables as # public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)] # # We always use the table name without the word sample for the target table name if settings.USE_SAMPLE_DATA_SETS or self.test: source_table = "{0}_{1}_{2}".format( config_entity.import_key or config_entity.key, db_entity.table, 'sample') else: source_table = "{0}_{1}".format(config_entity.import_key or config_entity.key, db_entity.table) connection_dict = postgres_url_to_connection_dict(db_entity.url) self._dump_tables_to_target( '-t %s' % source_table, source_schema='public', target_schema=db_entity.schema, source_table=source_table, target_table=table, connection_dict=connection_dict) # Create a spatial index spatial_index = '''create index {index_name} on {schema}.{key} using GIST (wkb_geometry);'''.format( index_name=spatial_index_name, schema=db_entity.schema, key=db_entity.key) cursor.execute(spatial_index) # Whether the table comes from our server or an upload, we want to transform the SRID to 4326 transform_to_4326 = 'ALTER TABLE {schema}.{table} ALTER COLUMN wkb_geometry ' \ 'TYPE Geometry(geometry, 4326) ' \ 'USING ST_Transform(ST_Force_2d(wkb_geometry), 4326);'.format logger.info("Transforming to 4326: %s" % transform_to_4326(schema=db_entity.schema, table=db_entity.table)) cursor.execute(transform_to_4326(schema=db_entity.schema, table=db_entity.table)) # Now cluster the data and vacuum so that future joins are faster: # * CLUSTER rewrites the data on disk so that rows that are spatially near each # other are also near each other on disk # * VACUUM cleans up disk space, removing sparse holes on disk. # * ANALYZE regenerates statistics about wkb_geometry so that the query planner can make # better decisions. logger.info('Clustering %s.%s to optimize spatial joins', db_entity.schema, table) cluster = 'CLUSTER {index_name} ON {target_schema}.{target_table};'.format( index_name=spatial_index_name, target_schema=db_entity.schema, target_table=table) cursor.execute(cluster) logger.info('Vacuuming and analyzing %s.%s.', db_entity.schema, table) analyze = 'VACUUM ANALYZE {target_schema}.{target_table};'.format( target_schema=db_entity.schema, target_table=table) cursor.execute(analyze) logger.info("Finished importing data for DbEntity table {0}.{1}".format(db_entity.schema, db_entity.key))
def import_data(self, **kwargs): """ Imports data from an external source to create the test data :return a two item tuple containing the region that was imported and a list of the imported projects """ # Calculate a sample lat/lon box of the config_entity config_entity = self.config_entity if self.test: bounds = chop_geom(config_entity.bounds, 0.90) logger.info( u"Creating subselection with extents: {0}. This will be used to crop any table that doesn't have a sample version" .format(bounds)) conn = psycopg2.connect( **pg_connection_parameters(settings.DATABASES['default'])) conn.set_isolation_level( psycopg2.extensions.ISOLATION_LEVEL_AUTOCOMMIT) cursor = conn.cursor() for db_entity in self.db_entities: # This is the index on wkb_geometry. spatial_index_name = '{schema}_{key}_geom_idx'.format( schema=db_entity.schema, key=db_entity.key) table = db_entity.table if db_entity.has_file_url: # Remove any table of the same name from the import schema. This is unlikley since imported # tables have timestamps drop_table('"%s"."%s"' % (settings.IMPORT_SCHEMA, db_entity.key)) sql_file_path = file_url_to_path(db_entity.url) # Create a command that pipes shp2pgsql to psql db_entity.srid = db_entity.srid or '4326' logger.info("verifying SRID {0}".format(db_entity.srid)) verify_srid(db_entity.srid) # Create the import schema if needed PGNamespace.objects.create_schema(settings.IMPORT_SCHEMA) # Import the table import_sql_command = '/usr/bin/psql {0} -f {1}'.format( self.target_database_connection, sql_file_path) stdin = "{0}\n{1}".format( self.arguments.get('password', None), self.target_database.get('PASSWORD', None)) results = self.command_execution.run(import_sql_command, stdin=stdin) if results.returncode: raise Exception(results.stderr.text) # We expect a table in the public schema with a named based on db_entity.key # Move the table from the public schema to the db_entity schema move_to_schema = "alter table {0}.{1} set schema {2};".format( settings.IMPORT_SCHEMA, db_entity.key, db_entity.schema) logger.info("Moving import file table to schema: %s" % move_to_schema) cursor.execute(move_to_schema) # Drop the constraint that enforces the srid of the wkb_geometry if one exists drop_constraint = '''alter table {0}.{1} drop constraint if exists enforce_srid_wkb_geometry'''.format( db_entity.schema, db_entity.key) logger.info("Dropping constraint on wkb_geometry: %s" % drop_constraint) cursor.execute(drop_constraint) # Note we're not creating an index on wkb_geometry # here because imported files already have an index # created. elif db_entity.has_db_url: # The import database currently stores tables as # public.[config_entity.key]_[feature_class._meta.db_table (with schema removed)][_sample (for samples)] # # We always use the table name without the word sample for the target table name if settings.USE_SAMPLE_DATA_SETS or self.test: source_table = "{0}_{1}_{2}".format( config_entity.import_key or config_entity.key, db_entity.table, 'sample') else: source_table = "{0}_{1}".format( config_entity.import_key or config_entity.key, db_entity.table) connection_dict = postgres_url_to_connection_dict( db_entity.url) self._dump_tables_to_target('-t %s' % source_table, source_schema='public', target_schema=db_entity.schema, source_table=source_table, target_table=table, connection_dict=connection_dict) # Create a spatial index spatial_index = '''create index {index_name} on {schema}.{key} using GIST (wkb_geometry);'''.format( index_name=spatial_index_name, schema=db_entity.schema, key=db_entity.key) cursor.execute(spatial_index) # Whether the table comes from our server or an upload, we want to transform the SRID to 4326 transform_to_4326 = 'ALTER TABLE {schema}.{table} ALTER COLUMN wkb_geometry ' \ 'TYPE Geometry(geometry, 4326) ' \ 'USING ST_Transform(ST_Force_2d(wkb_geometry), 4326);'.format logger.info("Transforming to 4326: %s" % transform_to_4326( schema=db_entity.schema, table=db_entity.table)) cursor.execute( transform_to_4326(schema=db_entity.schema, table=db_entity.table)) # Now cluster the data and vacuum so that future joins are faster: # * CLUSTER rewrites the data on disk so that rows that are spatially near each # other are also near each other on disk # * VACUUM cleans up disk space, removing sparse holes on disk. # * ANALYZE regenerates statistics about wkb_geometry so that the query planner can make # better decisions. logger.info('Clustering %s.%s to optimize spatial joins', db_entity.schema, table) cluster = 'CLUSTER {index_name} ON {target_schema}.{target_table};'.format( index_name=spatial_index_name, target_schema=db_entity.schema, target_table=table) cursor.execute(cluster) logger.info('Vacuuming and analyzing %s.%s.', db_entity.schema, table) analyze = 'VACUUM ANALYZE {target_schema}.{target_table};'.format( target_schema=db_entity.schema, target_table=table) cursor.execute(analyze) logger.info( "Finished importing data for DbEntity table {0}.{1}".format( db_entity.schema, db_entity.key))