def insert_table(conn_id, schema, src_table, dest_table): pg_hook = PostgresHook(postgres_conn_id=conn_id) select_cmd = """SELECT relname, n_live_tup from {} where schemaname = '{}' and relname != '{}';""".format(src_table, schema, dest_table) src_conn = pg_hook.get_conn() src_cursor = src_conn.cursor() src_cursor.execute(select_cmd) records = src_cursor.fetchall() dest_conn = pg_hook.get_conn() dest_cursor = dest_conn.cursor() execute_values(dest_cursor, "INSERT INTO {} VALUES %s".format(dest_table), records) dest_conn.commit()
def create_table(conn_id, dest_table): pg_hook = PostgresHook(postgres_conn_id=conn_id) delete_cmd = """ DROP TABLE IF EXISTS {}; """.format(dest_table) create_cmd = """ CREATE table {} ( table_name VARCHAR(50), row_count INTEGER ) """.format(dest_table) dest_conn = pg_hook.get_conn() delete_cursor = dest_conn.cursor() delete_cursor.execute(delete_cmd) dest_cursor = dest_conn.cursor() dest_cursor.execute(create_cmd) dest_conn.commit()
class StageS3ToRedshiftOperator(BaseOperator): @apply_defaults def __init__(self, load_datetime, record_source, redshift_conn_id, table, s3_path, s3_role, s3_region, delimiter=',', encoding='utf8', *args, **kwargs): self.load_datetime = load_datetime self.record_source = record_source self.redshift_conn_id = redshift_conn_id self.table = table self.s3_path = s3_path self.s3_role = s3_role self.s3_region = s3_region self.delimiter = delimiter self.encoding = encoding super().__init__(*args, **kwargs) def execute(self, context): self.hook = PostgresHook(postgres_conn_id=self.redshift_conn_id) conn = self.hook.get_conn() log.info("Connected with " + self.redshift_conn_id) table = self.table s3_path = self.s3_path s3_role = self.s3_role s3_region = self.s3_region delimiter = self.delimiter encoding = self.encoding copy_statement = f""" COPY {table} FROM '{s3_path}' IAM_ROLE '{s3_role}' REGION '{s3_region}' DELIMITER '{delimiter}' ENCODGIN '{encoding}' """ cursor = conn.cursor() cursor.execute(copy_statement) cursor.close() load_datetime = self.load_datetime record_source = self.record_source log.info("Loaded completed with load_datetime " + load_datetime) meta_statement = f""" UPDATE {table} SET LOAD_DATETIME = '{load_datetime}'', RECORD_SOURCE = '{record_source}'' WHERE LOAD_DATETIME IS NULL AND RECORD_SOURCE IS NULL """ cursor = conn.cursor() cursor.execute(meta_statement) cursor.close() log.info("Metadata injection completed with load_datetime " + load_datetime) conn.commit()