def define_tables(cls, metadata): Table( quoted_name('t1', quote=True), metadata, Column('id', Integer, primary_key=True), ) Table( quoted_name('t2', quote=True), metadata, Column('id', Integer, primary_key=True), Column('t1id', ForeignKey('t1.id')) )
def test_named_alias_disable_quote(self): cte = select([literal(1).label("id")]).cte( name=quoted_name('CTE', quote=False)) s1 = select([cte.c.id]).alias( name=quoted_name("DontQuote", quote=False)) s = select([s1]) self.assert_compile( s, 'WITH CTE AS (SELECT :param_1 AS id) ' 'SELECT DontQuote.id FROM ' '(SELECT CTE.id AS id FROM CTE) AS DontQuote' )
def view(name, metadata, selectable): if metadata.schema: full_name = metadata.schema + "." + name else: full_name = name t = table(quoted_name(name, None)) t.metadata = metadata t.fullname = full_name t.schema = quoted_name(metadata.schema, None) for c in selectable.c: c._make_proxy(t) CreateView(t, selectable).execute_at('after-create', metadata) DropView(t).execute_at('before-drop', metadata) return t
def test_pickle_anon_label(self): q1 = _anonymous_label(quoted_name("x", True)) for loads, dumps in picklers(): q2 = loads(dumps(q1)) assert isinstance(q2, _anonymous_label) eq_(str(q1), str(q2)) eq_(q1.quote, q2.quote)
def normalize_name(self, name): if name is None: return None if name.upper() == name and not \ self.identifier_preparer._requires_quotes(name.lower()): return name.lower() elif name.lower() == name: return quoted_name(name, quote=True) else: return name
def test_reflect_lowercase_forced_tables(self): m2 = MetaData(testing.db) t2_ref = Table(quoted_name('t2', quote=True), m2, autoload=True) t1_ref = m2.tables['t1'] assert t2_ref.c.t1id.references(t1_ref.c.id) m3 = MetaData(testing.db) m3.reflect(only=lambda name, m: name.lower() in ('t1', 't2')) assert m3.tables['t2'].c.t1id.references(m3.tables['t1'].c.id)
def normalize_name(self, name): # Remove trailing spaces: FB uses a CHAR() type, # that is padded with spaces name = name and name.rstrip() if name is None: return None elif name.upper() == name and \ not self.identifier_preparer._requires_quotes(name.lower()): return name.lower() elif name.lower() == name: return quoted_name(name, quote=True) else: return name
def test_quoted_name_bindparam_ok(self): from sqlalchemy.sql.elements import quoted_name with testing.db.connect() as conn: eq_( conn.scalar( select( [cast( literal(quoted_name("some_name", False)), String)] ) ), "some_name" )
def check_db_version(self, db_version_info, session=None): """ Checks the database version and prints an error message on database version mismatch. - On mismatching or on missing version a sys.exit(1) is called. - On missing DBVersion table, it returns False - On compatible DB version, it returns True Parameters: db_version_info (db_version.DBVersionInfo): required database version. session: an open database session or None. If session is None, a new session is created. """ try: dispose_engine = False if session is None: engine = SQLServer.create_engine(self.get_connection_string()) dispose_engine = True session = CreateSession(engine) else: engine = session.get_bind() if not engine.has_table(quoted_name(DBVersion.__tablename__, True)): LOG.debug("Missing DBVersion table!") return False version = session.query(DBVersion).first() if version is None: # Version is not populated yet LOG.error('No version information found in the database.') sys.exit(1) elif not db_version_info.is_compatible(version.major, version.minor): LOG.error('Version mismatch. Expected database version: ' + str(db_version_info)) version_from_db = 'v' + str(version.major) + '.' + str( version.minor) LOG.error('Version from the database is: ' + version_from_db) LOG.error('Please update your database.') sys.exit(1) LOG.debug("Database version is compatible.") return True finally: session.commit() if dispose_engine: engine.dispose()
def normalize_name(self, name): """ Converting EXASol case insensitive identifiers (upper case) to SQLAlchemy case insensitive identifiers (lower case) """ if name is None: return None if six.PY2: if isinstance(name, str): name = name.decode(self.encoding) if name.upper() == name and \ not self.identifier_preparer._requires_quotes(name.lower()): return name.lower() elif name.lower() == name: return quoted_name(name, quote=True) else: return name
def test_rconcat_anon(self): q1 = _anonymous_label(quoted_name("x", True)) assert isinstance(q1, _anonymous_label) value = "y" + q1 assert isinstance(value, _anonymous_label) self._assert_quoted(value, True)
def test_rconcat_quotenone(self): q1 = quoted_name("x", None) self._assert_not_quoted("y" + q1)
def test_apply_map_plain(self): q1 = _anonymous_label(quoted_name("x%s", None)) q2 = q1.apply_map(("bar")) eq_(q2, "xbar") self._assert_not_quoted(q2)
def get_batch(self, batch_kwargs, batch_parameters=None): # We need to build a batch_id to be used in the dataframe batch_markers = BatchMarkers({ "ge_load_time": datetime.datetime.now( datetime.timezone.utc).strftime("%Y%m%dT%H%M%S.%fZ") }) if "bigquery_temp_table" in batch_kwargs: # deprecated-v0.15.3 warnings.warn( "BigQuery tables that are created as the result of a query are no longer created as " "permanent tables. Thus, a named permanent table through the `bigquery_temp_table`" "parameter is not required. The `bigquery_temp_table` parameter is deprecated as of" "v0.15.3 and will be removed in v0.18.", DeprecationWarning, ) if "snowflake_transient_table" in batch_kwargs: # Snowflake can use either a transient or temp table, so we allow a table_name to be provided query_support_table_name = batch_kwargs.get( "snowflake_transient_table") else: query_support_table_name = None if "query" in batch_kwargs: if "limit" in batch_kwargs or "offset" in batch_kwargs: logger.warning( "Limit and offset parameters are ignored when using query-based batch_kwargs; consider " "adding limit and offset directly to the generated query.") if "query_parameters" in batch_kwargs: query = Template(batch_kwargs["query"]).safe_substitute( batch_kwargs["query_parameters"]) else: query = batch_kwargs["query"] batch_reference = SqlAlchemyBatchReference( engine=self.engine, query=query, table_name=query_support_table_name, schema=batch_kwargs.get("schema"), ) elif "table" in batch_kwargs: table = batch_kwargs["table"] if batch_kwargs.get("use_quoted_name"): table = quoted_name(table, quote=True) limit = batch_kwargs.get("limit") offset = batch_kwargs.get("offset") if limit is not None or offset is not None: logger.info( "Generating query from table batch_kwargs based on limit and offset" ) # In BigQuery the table name is already qualified with its schema name if self.engine.dialect.name.lower() == "bigquery": schema = None else: schema = batch_kwargs.get("schema") # limit doesn't compile properly for oracle so we will append rownum to query string later if self.engine.dialect.name.lower() == "oracle": raw_query = sqlalchemy.select( [sqlalchemy.text("*")]).select_from( sqlalchemy.schema.Table(table, sqlalchemy.MetaData(), schema=schema)) else: raw_query = (sqlalchemy.select( [sqlalchemy.text("*")]).select_from( sqlalchemy.schema.Table( table, sqlalchemy.MetaData(), schema=schema)).offset(offset).limit(limit)) query = str( raw_query.compile(self.engine, compile_kwargs={"literal_binds": True})) # use rownum instead of limit in oracle if self.engine.dialect.name.lower() == "oracle": query += "\nWHERE ROWNUM <= %d" % limit batch_reference = SqlAlchemyBatchReference( engine=self.engine, query=query, table_name=query_support_table_name, schema=batch_kwargs.get("schema"), ) else: batch_reference = SqlAlchemyBatchReference( engine=self.engine, table_name=table, schema=batch_kwargs.get("schema"), ) else: raise ValueError( "Invalid batch_kwargs: exactly one of 'table' or 'query' must be specified" ) return Batch( datasource_name=self.name, batch_kwargs=batch_kwargs, data=batch_reference, batch_parameters=batch_parameters, batch_markers=batch_markers, data_context=self._data_context, )
def test_pickle_quote(self): q1 = quoted_name("x", True) for loads, dumps in picklers(): q2 = loads(dumps(q1)) eq_(str(q1), str(q2)) eq_(q1.quote, q2.quote)
def test_apply_map_quoted(self): q1 = _anonymous_label(quoted_name("x%s", True)) q2 = q1.apply_map(("bar")) eq_(q2, "xbar") eq_(q2.quote, True)
def test_coerce_quoted_retain(self): q1 = quoted_name("x", False) q2 = quoted_name(q1, False) eq_(q2.quote, False)
def test_coerce_none(self): q1 = quoted_name(None, False) eq_(q1, None)
def test_rconcat_quotefalse(self): q1 = quoted_name("x", False) self._assert_not_quoted("y" + q1)
def test_rconcat_quotetrue(self): q1 = quoted_name("x", True) self._assert_not_quoted("y" + q1)
def test_coerce_quoted_switch(self): q1 = quoted_name("x", False) q2 = quoted_name(q1, True) eq_(q2.quote, True)
def test_coerce_quoted_none(self): q1 = quoted_name("x", False) q2 = quoted_name(q1, None) eq_(q2.quote, False)
def get_compute_domain( self, domain_kwargs: Dict, domain_type: Union[str, MetricDomainTypes], accessor_keys: Optional[Iterable[str]] = None, ) -> Tuple[Select, dict, dict]: """Uses a given batch dictionary and domain kwargs to obtain a SqlAlchemy column object. Args: domain_kwargs (dict) - A dictionary consisting of the domain kwargs specifying which data to obtain domain_type (str or MetricDomainTypes) - an Enum value indicating which metric domain the user would like to be using, or a corresponding string value representing it. String types include "identity", "column", "column_pair", "table" and "other". Enum types include capitalized versions of these from the class MetricDomainTypes. accessor_keys (str iterable) - keys that are part of the compute domain but should be ignored when describing the domain and simply transferred with their associated values into accessor_domain_kwargs. Returns: SqlAlchemy column """ # Extracting value from enum if it is given for future computation domain_type = MetricDomainTypes(domain_type) batch_id = domain_kwargs.get("batch_id") if batch_id is None: # We allow no batch id specified if there is only one batch if self.active_batch_data: data_object = self.active_batch_data else: raise GreatExpectationsError( "No batch is specified, but could not identify a loaded batch." ) else: if batch_id in self.loaded_batch_data_dict: data_object = self.loaded_batch_data_dict[batch_id] else: raise GreatExpectationsError( f"Unable to find batch with batch_id {batch_id}" ) compute_domain_kwargs = copy.deepcopy(domain_kwargs) accessor_domain_kwargs = dict() if "table" in domain_kwargs and domain_kwargs["table"] is not None: # TODO: Add logic to handle record_set_name once implemented # (i.e. multiple record sets (tables) in one batch if domain_kwargs["table"] != data_object.selectable.name: selectable = sa.Table( domain_kwargs["table"], sa.MetaData(), schema_name=data_object._schema_name, ) else: selectable = data_object.selectable elif "query" in domain_kwargs: raise ValueError( "query is not currently supported by SqlAlchemyExecutionEngine" ) else: selectable = data_object.selectable if ( "row_condition" in domain_kwargs and domain_kwargs["row_condition"] is not None ): condition_parser = domain_kwargs["condition_parser"] if condition_parser == "great_expectations__experimental__": parsed_condition = parse_condition_to_sqlalchemy( domain_kwargs["row_condition"] ) selectable = sa.select( "*", from_obj=selectable, whereclause=parsed_condition ) else: raise GreatExpectationsError( "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser." ) # Warning user if accessor keys are in any domain that is not of type table, will be ignored if ( domain_type != MetricDomainTypes.TABLE and accessor_keys is not None and len(list(accessor_keys)) > 0 ): logger.warning( 'Accessor keys ignored since Metric Domain Type is not "table"' ) if domain_type == MetricDomainTypes.TABLE: if accessor_keys is not None and len(list(accessor_keys)) > 0: for key in accessor_keys: accessor_domain_kwargs[key] = compute_domain_kwargs.pop(key) if len(domain_kwargs.keys()) > 0: # Warn user if kwarg not "normal". unexpected_keys: set = set(compute_domain_kwargs.keys()).difference( { "batch_id", "table", "row_condition", "condition_parser", } ) if len(unexpected_keys) > 0: unexpected_keys_str: str = ", ".join( map(lambda element: f'"{element}"', unexpected_keys) ) logger.warning( f'Unexpected key(s) {unexpected_keys_str} found in domain_kwargs for domain type "{domain_type.value}".' ) return selectable, compute_domain_kwargs, accessor_domain_kwargs # If user has stated they want a column, checking if one is provided, and elif domain_type == MetricDomainTypes.COLUMN: if "column" in compute_domain_kwargs: # Checking if case- sensitive and using appropriate name if self.active_batch_data.use_quoted_name: accessor_domain_kwargs["column"] = quoted_name( compute_domain_kwargs.pop("column") ) else: accessor_domain_kwargs["column"] = compute_domain_kwargs.pop( "column" ) else: # If column not given raise GreatExpectationsError( "Column not provided in compute_domain_kwargs" ) # Else, if column pair values requested elif domain_type == MetricDomainTypes.COLUMN_PAIR: # Ensuring column_A and column_B parameters provided if ( "column_A" in compute_domain_kwargs and "column_B" in compute_domain_kwargs ): if self.active_batch_data.use_quoted_name: # If case matters... accessor_domain_kwargs["column_A"] = quoted_name( compute_domain_kwargs.pop("column_A") ) accessor_domain_kwargs["column_B"] = quoted_name( compute_domain_kwargs.pop("column_B") ) else: accessor_domain_kwargs["column_A"] = compute_domain_kwargs.pop( "column_A" ) accessor_domain_kwargs["column_B"] = compute_domain_kwargs.pop( "column_B" ) else: raise GreatExpectationsError( "column_A or column_B not found within compute_domain_kwargs" ) # Checking if table or identity or other provided, column is not specified. If it is, warning the user elif domain_type == MetricDomainTypes.MULTICOLUMN: if "column_list" in compute_domain_kwargs: # If column_list exists accessor_domain_kwargs["column_list"] = compute_domain_kwargs.pop( "column_list" ) # Filtering if identity elif domain_type == MetricDomainTypes.IDENTITY: # If we would like our data to become a single column if "column" in compute_domain_kwargs: if self.active_batch_data.use_quoted_name: selectable = sa.select( [sa.column(quoted_name(compute_domain_kwargs["column"]))] ).select_from(selectable) else: selectable = sa.select( [sa.column(compute_domain_kwargs["column"])] ).select_from(selectable) # If we would like our data to now become a column pair elif ("column_A" in compute_domain_kwargs) and ( "column_B" in compute_domain_kwargs ): if self.active_batch_data.use_quoted_name: selectable = sa.select( [ sa.column(quoted_name(compute_domain_kwargs["column_A"])), sa.column(quoted_name(compute_domain_kwargs["column_B"])), ] ).select_from(selectable) else: selectable = sa.select( [ sa.column(compute_domain_kwargs["column_A"]), sa.column(compute_domain_kwargs["column_B"]), ] ).select_from(selectable) else: # If we would like our data to become a multicolumn if "column_list" in compute_domain_kwargs: if self.active_batch_data.use_quoted_name: # Building a list of column objects used for sql alchemy selection to_select = [ sa.column(quoted_name(col)) for col in compute_domain_kwargs["column_list"] ] selectable = sa.select(to_select).select_from(selectable) else: to_select = [ sa.column(col) for col in compute_domain_kwargs["column_list"] ] selectable = sa.select(to_select).select_from(selectable) # Letting selectable fall through return selectable, compute_domain_kwargs, accessor_domain_kwargs
def _set_schema_name(s, name): s.name = quoted_name(name, s.kwargs.pop('quote', None))