示例#1
0
def insert_table(conn_id, schema, src_table, dest_table):

    pg_hook = PostgresHook(postgres_conn_id=conn_id)
    select_cmd = """SELECT relname, n_live_tup from {} where schemaname = '{}'
    and relname != '{}';""".format(src_table, schema, dest_table)
    src_conn = pg_hook.get_conn()
    src_cursor = src_conn.cursor()
    src_cursor.execute(select_cmd)
    records = src_cursor.fetchall()
    dest_conn = pg_hook.get_conn()
    dest_cursor = dest_conn.cursor()
    execute_values(dest_cursor, "INSERT INTO {} VALUES %s".format(dest_table),
                   records)
    dest_conn.commit()
示例#2
0
def create_table(conn_id, dest_table):

    pg_hook = PostgresHook(postgres_conn_id=conn_id)
    delete_cmd = """
                DROP TABLE IF EXISTS {};
                 """.format(dest_table)
    create_cmd = """
                 CREATE table {} (
                 table_name VARCHAR(50),
                 row_count INTEGER
                 )
                 """.format(dest_table)
    dest_conn = pg_hook.get_conn()
    delete_cursor = dest_conn.cursor()
    delete_cursor.execute(delete_cmd)
    dest_cursor = dest_conn.cursor()
    dest_cursor.execute(create_cmd)
    dest_conn.commit()
示例#3
0
class StageS3ToRedshiftOperator(BaseOperator):
    @apply_defaults
    def __init__(self,
                 load_datetime,
                 record_source,
                 redshift_conn_id,
                 table,
                 s3_path,
                 s3_role,
                 s3_region,
                 delimiter=',',
                 encoding='utf8',
                 *args,
                 **kwargs):
        self.load_datetime = load_datetime
        self.record_source = record_source
        self.redshift_conn_id = redshift_conn_id
        self.table = table
        self.s3_path = s3_path
        self.s3_role = s3_role
        self.s3_region = s3_region
        self.delimiter = delimiter
        self.encoding = encoding

        super().__init__(*args, **kwargs)

    def execute(self, context):
        self.hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        conn = self.hook.get_conn()
        log.info("Connected with " + self.redshift_conn_id)

        table = self.table
        s3_path = self.s3_path
        s3_role = self.s3_role
        s3_region = self.s3_region
        delimiter = self.delimiter
        encoding = self.encoding

        copy_statement = f"""
        COPY {table}
        FROM '{s3_path}'
        IAM_ROLE '{s3_role}'
        REGION '{s3_region}'
        DELIMITER '{delimiter}'
        ENCODGIN '{encoding}'
        """
        cursor = conn.cursor()
        cursor.execute(copy_statement)
        cursor.close()

        load_datetime = self.load_datetime
        record_source = self.record_source

        log.info("Loaded completed with load_datetime " + load_datetime)

        meta_statement = f"""
        UPDATE {table}
        SET LOAD_DATETIME = '{load_datetime}'',
        RECORD_SOURCE = '{record_source}''
        WHERE LOAD_DATETIME IS NULL 
        AND RECORD_SOURCE IS NULL
        """
        cursor = conn.cursor()
        cursor.execute(meta_statement)
        cursor.close()
        log.info("Metadata injection completed with load_datetime " +
                 load_datetime)

        conn.commit()