def execute(self, context) -> None: redshift_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id) conn = S3Hook.get_connection(conn_id=self.aws_conn_id) credentials_block = None if conn.extra_dejson.get('role_arn', False): credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}" else: s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify) credentials = s3_hook.get_credentials() credentials_block = build_credentials_block(credentials) copy_options = '\n\t\t\t'.join(self.copy_options) destination = f'{self.schema}.{self.table}' copy_destination = f'#{self.table}' if self.method == 'UPSERT' else destination copy_statement = self._build_copy_query(copy_destination, credentials_block, copy_options) sql: Union[list, str] if self.method == 'REPLACE': sql = [ "BEGIN;", f"DELETE FROM {destination};", copy_statement, "COMMIT" ] elif self.method == 'UPSERT': keys = self.upsert_keys or redshift_hook.get_table_primary_key( self.table, self.schema) if not keys: raise AirflowException( f"No primary key on {self.schema}.{self.table}. Please provide keys on 'upsert_keys'" ) where_statement = ' AND '.join( [f'{self.table}.{k} = {copy_destination}.{k}' for k in keys]) sql = [ f"CREATE TABLE {copy_destination} (LIKE {destination});", copy_statement, "BEGIN;", f"DELETE FROM {destination} USING {copy_destination} WHERE {where_statement};", f"INSERT INTO {destination} SELECT * FROM {copy_destination};", "COMMIT", ] else: sql = copy_statement self.log.info('Executing COPY command...') redshift_hook.run(sql, autocommit=self.autocommit) self.log.info("COPY command complete...")
def execute(self, context: 'Context') -> None: redshift_hook = RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id) conn = S3Hook.get_connection(conn_id=self.aws_conn_id) if conn.extra_dejson.get('role_arn', False): credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}" else: s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify) credentials = s3_hook.get_credentials() credentials_block = build_credentials_block(credentials) unload_options = '\n\t\t\t'.join(self.unload_options) unload_query = self._build_unload_query(credentials_block, self.select_query, self.s3_key, unload_options) self.log.info('Executing UNLOAD command...') redshift_hook.run(unload_query, self.autocommit, parameters=self.parameters) self.log.info("UNLOAD command complete...")
def get_hook(self) -> RedshiftSQLHook: """Create and return RedshiftSQLHook. :return RedshiftSQLHook: A RedshiftSQLHook instance. """ return RedshiftSQLHook(redshift_conn_id=self.redshift_conn_id)