def get_all(db_url=None, connection_func=None): """ Utility function,returning association dictionaries for all table relations Keyword Parameters: db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArguments -- raised if neither connection or db_url parameter is specified. >>> get_all() Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection_func) # prepare statement, to get association metadata select_statement = ( 'SELECT \n' ' t_par.name AS parent \n' ' ,a.parent_table_col AS parent_column \n' ' ,t.name AS table \n' ' ,a.table_col AS column \n' ' ,at.type_name AS type \n' 'FROM {schema}.{association_table} a \n' ' INNER JOIN {schema}.{association_type_table} at \n' ' ON a.table_relation_type_id = at.table_relation_type_id \n' ' INNER JOIN {schema}.{table_table} t \n' ' ON a.table_id = t.table_id \n' ' INNER JOIN {schema}.{table_table} t_par \n' ' ON a.parent_table_id = t_par.table_id \n').format( schema=dto_util.SCHEMA, association_table=TABLE, table_table=table.TABLE, association_type_table=association_type.TABLE) try: result = connection.execute(select_statement) associations = [] for row in result: table_association = dict(zip(row.keys(), row.values())) validate(table_association) associations.append(table_association) return associations except: raise finally: connection.close()
def get_all( db_url=None, connection_func=None): """ retrive the current list of authorization from the db Keyword Parameters: db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArgument -- raised if neither connection or db_url parameter is specified. ValidateException -- raised when a problem is encountered validating a dto >>> get_all() Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection_func) # prepare statement, to get table metadata select_statement = ('SELECT \n' ' a.user_id \n' ' ,p.name AS "project" \n' ' ,t.name AS "table" \n' 'FROM {auth_schema}.{authorization_table} a \n' ' INNER JOIN {support_schema}.{table_table} t \n' ' ON a.table_id = t.table_id \n' ' INNER JOIN {support_schema}.{project_table} p \n' ' ON t.project_id = p.project_id \n' ).format(auth_schema=SCHEMA, authorization_table=TABLE ,support_schema=dto_util.SCHEMA ,project_table=project.TABLE ,table_table=table.TABLE) try: result = connection.execute(select_statement) authorizations = list() for row in result: authorization = dict(row) #make a real dict, so we can pprint() etc. validate(authorization) authorizations.append(authorization) return authorizations except: raise finally: connection.close()
def get_all(db_url=None, connection_func=None): """ retrive the current list of projects from the warehouse support schema. Keyword Parameters: db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArgument -- raised if neither connection or db_url parameter is specified. ValidateException -- raised when a problem is encountered validating a dto >>> get_all() Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection_func) # prepare statement, to get project metadata select_statement = ('SELECT \n' ' p.name \n' ' ,p.title \n' ' ,p.inport_data_set_id AS inport_id \n' ' ,p.csw_uuid AS uuid \n' 'FROM {schema}.{table} p \n').format( schema=dto_util.SCHEMA, table=TABLE) try: result = connection.execute(select_statement) projects = list() for row in result: project = dict(row) validate(project) projects.append(project) return projects except: raise finally: connection.close()
def get(table_name=None, db_url=None, connection_func=None): """ retrive the current list of variables from the warehouse support schema. (Optionally filtered to just 1x table) Keyword Parameters: table -- String, representing the name of a Warehoused table db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArgument -- raised if neither connection or db_url parameter is specified. ValidateException -- raised when a problem is encountered validating a dto >>> get() Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection_func) # prepare statement, to get variable metadata select_statement = ( 'SELECT \n' ' v.column_name AS column \n' ' ,v.title \n' ' ,v.description \n' ' ,t.name AS table\n' ' ,pt.constructor_name AS python_type \n' ' ,v.column_type AS physical_type \n' ' ,v.units \n' ' ,v.max_length \n' ' ,v.precision \n' ' ,v.allowed_values \n' 'FROM {schema}.{variable_table} v \n' ' INNER JOIN {schema}.{python_type_table} pt \n' ' ON v.variable_python_type_id = pt.variable_python_type_id \n' ' INNER JOIN {schema}.{table_table} t \n' ' ON v.table_id = t.table_id \n' 'WHERE \n' ' (CASE WHEN %(name)s IS NOT NULL AND t.name = %(name)s \n' ' THEN 1 --if name specified, return row only if name matches \n' ' WHEN %(name)s IS NULL \n' ' THEN 1 --if no name specified,return all variables(i.e.: 1=1)\n' ' ELSE 0 --dont return row if name specified but doesnt match row\n' ' END) = 1 \n').format(schema=dto_util.SCHEMA, variable_table=TABLE, table_table=table.TABLE, python_type_table=variable_python_type.TABLE) try: result = connection.execute(select_statement, name=table_name) variables = [] for row in result: variable = dict(row) validate(variable) variables.append(variable) return variables except: raise finally: connection.close()
def get_by_lookup(table_names, db_url=None, connection_func=None): """ Utility function,returning variable dictionaries associated with named tables. Keyword Parameters: table_names -- A collection of table names, for which the table variables are to be retrieved. db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArguments -- raised if neither connection or db_url parameter is specified. >>> any_list = ['any_thing'] >>> get_by_lookup( any_list) Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection #FIXME: connection should really just be an Engine discover_connection = dto_util.get_connection(db_url, connection_func) # retrieve fields select_statement = ( 'SELECT \n' ' DISTINCT c.table_name as table \n' ' ,c.column_name as column \n' 'FROM \n' ' information_schema.tables t \n' ' INNER JOIN information_schema.columns c \n' ' ON t.table_schema = c.table_schema \n' ' AND t.table_name = c.TABLE_NAME \n' ' INNER JOIN pg_catalog.pg_attribute a --Identify fields used in Foreign key lookups \n' ''' ON a.attrelid = (t.table_schema||'."'||t.table_name||'"')::regclass \n''' ' AND a.attname = c.column_name \n' ' left outer join pg_catalog.pg_constraint con -- match table attribute # to any lookup \n' ''' ON con.conrelid = (t.table_schema||'."'||t.table_name||'"')::regclass \n''' ''' AND con.contype = 'f' -- only FOREIGN key constraints \n''' ' AND a.attnum = ANY(con.conkey) \n' ' left outer join pg_catalog.pg_constraint con_f -- match table attribute # to any lookup \n' ''' ON con_f.confrelid = (t.table_schema||'."'||t.table_name||'"')::regclass \n''' ''' AND con_f.contype = 'f' -- only FOREIGN key constraints \n''' ' AND a.attnum = ANY(con_f.confkey) \n' 'WHERE \n' ' t.table_name = %s \n' ' AND con.conkey IS Null --a result with no constraint Keys arent used in a fk lookup \n' ' AND con_f.confkey IS Null --a result with no constraint Keys isnt used by a fk lookup \n' ) try: # build list of variable dicts variables = [] for name in table_names: result = discover_connection.execute(select_statement, name) for row in result: table, column = row['table'], row['column'] try: python_type = variable_python_type.get_by_lookup( table, column, db_url, connection_func) except variable_python_type.LookupNullType as e: logging.info(e, exc_info=True) continue #skip this table field variable = { 'table': table, 'column': column, 'title': None, 'description': None, 'python_type': python_type, 'physical_type': None, 'units': None, 'max_length': None, 'precision': None, 'allowed_values': None } validate(variable) variables.append(variable) return variables except: raise finally: discover_connection.close()
def lookup_tables(table_names, table_type='fact', lookup_type='dimension', db_url=None, connection_func=None): """ Utility function,returning table dictionaries associated with named tables. Keyword Parameters: table_names -- A collection of Strings representing tables for which lists of associated tables are to be retrieved. db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArguments -- raised if neither connection or db_url parameter is specified. >>> any_list = ['any_thing'] >>> lookup_tables( any_list) Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection_func) # select table info select_statement = """ SELECT t_base.table_name as "table" --fact table name --,c.conkey ,a_base.attname as "table_field" --fact table field containing keys to be looked up ,t_ref.table_schema as "ref_table_schema" --schema of referenced dimension table ,t_ref.table_name as "ref_table" --referenced dimension table name --,c.confkey ,a_ref.attname as "ref_table_field" --dimension column containing the keys ,pg_catalog.pg_get_constraintdef(c.oid, true) as condef --pretty constraint text FROM pg_catalog.pg_constraint c inner join information_schema.tables t_base on c.conrelid = (t_base.table_schema||'."'||t_base.table_name||'"')::regclass inner join pg_attribute a_base on c.conrelid = a_base.attrelid AND a_base.attnum = ANY(c.conkey) inner join information_schema.tables t_ref on c.confrelid = (t_ref.table_schema||'."'||t_ref.table_name||'"')::regclass inner join pg_attribute a_ref on c.confrelid = a_ref.attrelid AND a_ref.attnum = ANY(c.confkey) WHERE c.contype = 'f' --Get only FOREIGN key constraints and t_base.table_name = %s """ try: # build list of table dicts tables = [] for name in table_names: result = connection.execute(select_statement, name) ref_table_encountered = [] #track each referenced table we add for row in result: ref_table = row['ref_table'] if ref_table not in ref_table_encountered: new_table = { 'name': ref_table, 'type': lookup_type, 'updated': None, 'rows': None, 'years': None, 'project': None, 'contact': None } table.validate(new_table) tables.append(new_table) ref_table_encountered.append( ref_table) #only build 1x dict ea # check for Dimensional aliases (Roles) table_associations = lookup_associations( table_names, db_url, connection_func=connection_func, lookup_roles=False) roles_tuple = dto_util.get_roles(table_associations) role_tables, replacement_associations, role_associations = roles_tuple if replacement_associations: # include Dimension"roles" as tables,upon detection of"role" assoc. tables.extend(role_tables) return tables except: raise finally: connection.close()
def lookup_associations(table_names, db_url=None, connection_func=None, default_type='fact dimension', lookup_roles=True): """ Utility function,returning association dictionaries associated with named tables. Keyword Parameters: table_names -- A collection of table names, for which the table associations are to be retrieved. db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) default_type -- String representing the association_type to be used for items found to be associated with one of the input tables lookup_roles -- Boolean flag, indicating if the detected associations should be inspected for Dimensional aliases (Default: True) Exceptions: ConnectionMissingArguments -- raised if neither connection or db_url parameter is specified. >>> any_list = ['any_thing'] >>> lookup_associations( any_list) Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection_func) # retrieve associations select_statement = ( 'SELECT \n' ' t_base.table_name as "table" --table name \n' ' --,c.conkey \n' ' ,a_base.attname as "table_field" --table field containing keys to be looked up \n' ' ,t_ref.table_schema as "ref_table_schema" --schema of referenced table \n' ' ,t_ref.table_name as "ref_table" --referenced table name \n' ' --,c.confkey \n' ' ,a_ref.attname as "ref_table_field" --referenced table column containing the keys \n' ' ,pg_catalog.pg_get_constraintdef(c.oid, true) as condef --pretty constraint text \n' 'FROM pg_catalog.pg_constraint c \n' ' inner join information_schema.tables t_base \n' ''' on c.conrelid = (t_base.table_schema||'."'||t_base.table_name||'"')::regclass \n''' ' inner join pg_attribute a_base \n' ' on c.conrelid = a_base.attrelid \n' ' AND a_base.attnum = ANY(c.conkey) \n' ' inner join information_schema.tables t_ref \n' ''' on c.confrelid = (t_ref.table_schema||'."'||t_ref.table_name||'"')::regclass \n''' ' inner join pg_attribute a_ref \n' ' on c.confrelid = a_ref.attrelid \n' ' AND a_ref.attnum = ANY(c.confkey) \n' '''WHERE c.contype = 'f' --Get only FOREIGN key constraints \n''' ' and t_base.table_name = %s \n') try: # build list of association dicts associations = [] for name in table_names: result = connection.execute(select_statement, name) for row in result: ref_table, ref_field = row['ref_table'], row['ref_table_field'] table, field = row['table'], row['table_field'] new_association = { 'parent': ref_table, 'parent_column': ref_field, 'table': table, 'column': field, 'type': default_type } association.validate(new_association) associations.append(new_association) if lookup_roles: # check for Dimensional aliases (Roles) roles_tuple = dto_util.get_roles(associations) role_tables, replacement_associations, role_associations = roles_tuple if replacement_associations: # prepare a map,to replace detected assoc w/new role-aware versions detected_assocs_by_table_column_tuple = {} for detected_association in associations: detected_table = detected_association['table'] detected_column = detected_association['column'] key = (detected_table, detected_column) detected_assocs_by_table_column_tuple[ key] = detected_association for key in replacement_associations.keys(): # replace naive assoc.s with Dimension "role"-aware versions replacement = replacement_associations[key] detected_assocs_by_table_column_tuple[key] = replacement associations = list( detected_assocs_by_table_column_tuple.values()) # add additional associations,relating the detected Dimension #"roles" back to their base dimensions. associations.extend(role_associations) return associations except: raise finally: connection.close()
def get( db_url=None, connection_func=None): """ retrive the current list of tables from the warehouse support schema. Keyword Parameters: db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection_func -- function returning SQLAlchemy connections (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArgument -- raised if neither connection or db_url parameter is specified. ValidateException -- raised when a problem is encountered validating a dto >>> get() Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection( db_url, connection_func) # prepare statement, to get table metadata select_statement = ('SELECT \n' ' t.name \n' ' ,tt.type_name as type \n' ' ,t.num_rows as rows \n' ' ,t.aud_beg_dtm as updated \n' ' ,t.data_years as years \n' ' ,pt.name as project \n' ' ,ct.info as contact \n' ' ,t.is_selectable as selectable \n' ' ,t.inport_entity_id as inport_id \n' ' ,t.inport_replacement_project_id \n' ' ,t.description \n' ' ,t.title \n' ' ,t.csw_uuid AS uuid \n' ' ,tuf.iso_maintenance_update_code AS update_frequency \n' ' ,tuc.gmd_code AS restriction \n' ' ,t.usage_notice \n' ' ,t.keywords \n' ' ,t.north_bound \n' ' ,t.east_bound \n' ' ,t.south_bound \n' ' ,t.west_bound \n' ' ,t.is_sensitive as confidential \n' 'FROM {schema}.{table} t \n' ' INNER JOIN {schema}.{type_table} tt \n' ' ON t.table_type_id = tt.table_type_id \n' ' INNER JOIN {schema}.{project_table} pt \n' ' ON t.project_id = pt.project_id \n' ' INNER JOIN {schema}.{contact_table} ct \n' ' ON t.contact_id = ct.contact_id \n' ' INNER JOIN {schema}.table_update_frequency tuf \n' ' ON t.table_update_frequency_id = tuf.table_update_frequency_id \n' ' INNER JOIN {schema}.table_use_constraint tuc \n' ' ON t.table_use_constraint_id = tuc.table_use_constraint_id \n' ).format(schema=dto_util.SCHEMA, table=TABLE ,contact_table=contact.TABLE ,type_table=table_type.TABLE ,project_table=project.TABLE) try: result = connection.execute( select_statement) tables = list() for row in result: table = dict(row) #make a real dict, so we can pprint() etc. # condense SQL spatial bounds fields into a simple string sql_bounds_values = (table[field] for field in sql_bounds_fields) table['bounds'] = '{}, {}, {}, {}'.format(*sql_bounds_values) for key in sql_bounds_fields: #remove the SQL fields del table[key] validate(table) tables.append(table) return tables except: raise finally: connection.close()
def get_by_lookup(table_name, column_name, db_url=None, connection=None): """ returns the name of the Python type constructor, that corresponds to the referenced table & column. Keyword Parameters: table_name -- String representing name of the warehouse table, where column is located column_name -- String representing name of the warehouse column who's Python type constructor name is to be returned. db_url -- String, representing a SQLAlchemy connection (Required, if parameter 'connection' is not provided. connection -- SQLAlchemy connection (Optional, if provided, will override db_url) Exceptions: ConnectionMissingArguments -- raised if neither connection or db_url parameter is specified. >>> get_by_lookup( 'any_table', 'some_field') Traceback (most recent call last): ... api.resources.source.warehouse.support.dto_util.ConnectionMissingArgument """ # get db connection connection = dto_util.get_connection(db_url, connection) # fetch table metadata try: # obtain python type of column via SQLAlchemy reflection metadata = sqlalchemy.MetaData() #any needed lookup Tables will be # listed in metadata.tables table = sqlalchemy.Table( table_name, metadata, autoload=True # reflect columns, as defined in the db , autoload_with=connection, schema=discover.SCHEMA) for column in table.columns: if column.name == column_name: try: constructor = column.type.python_type except NotImplementedError as e: if isinstance(column.type, sqlalchemy.sql.sqltypes.NullType): msg = ("Unknown database type [maybe PostGIS?]." " Table$Column: '{}${}'").format( table_name, column_name) raise LookupNullType(msg) raise # match SQLAlchemy constructor, to the one we use internally python_type_constructor = None if issubclass(constructor, int): python_type_constructor = 'int' #FIXME: transition away from float.. Decimal is more user friendly if issubclass(constructor, (float, decimal.Decimal)): python_type_constructor = 'float' if issubclass(constructor, str): python_type_constructor = 'str' if issubclass(constructor, (datetime.datetime, datetime.time)): python_type_constructor = 'datetime.datetime' if python_type_constructor is None: msg = "Unable to map '{}' to Warehouse type".format( constructor) raise Exception(msg) #TODO: refactor into custom class validate(python_type_constructor) return python_type_constructor else: msg = "No columns defined, for table '{}'".format(table_name) raise Exception(msg) #TODO: refactor into custom class except: raise finally: connection.close()