示例#1
0
    def execute(self, context):
        aws_hook = AwsBaseHook(self.aws_credentials_id, client_type='s3')
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        self.log.info('Date:' + self.execution_date)
        date = parser.parse(self.execution_date)

        self.log.info("Backfill_data: {}".format(self.backfill_data))
        s3_bucket_key = "s3://{}/{}".format(self.s3_bucket, self.s3_key)
        if self.backfill_data:
            s3_path = s3_bucket_key + '/' + str(date.year) + '/' + str(
                date.month)
        else:
            s3_path = s3_bucket_key
        self.log.info("S3 path: {}".format(s3_path))

        self.log.info("Deleting data from table {}.".format(self.table))

        try:
            redshift.run("DELETE FROM {}".format(self.table))
        except table_does_not_exist as ex:
            self.log.info("Andrea does not exist")

        copy_sql = self.COPY_SQL.format(self.table, s3_path,
                                        credentials.access_key,
                                        credentials.secret_key, self.region,
                                        self.json_path)
        self.log.info(
            "SQL Statement Executing on Redshift: {}".format(copy_sql))
        redshift.run(copy_sql)
    def execute(self, context):
        aws_hook = AwsHook(self.aws_credentials_id, client_type="redshift")
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        s3_path = "s3://{}/{}/{}".format(self.s3_bucket, self.s3_directory,
                                         self.table["s3"]["key"])
        files_format = self.table["s3"]["format"]
        delimiter = self.table["s3"]["delimiter"]
        ignoreheader = self.table["s3"]["ignoreheader"]
        delimiter_text = ''

        if (delimiter):
            delimiter_text = f"delimiter '{delimiter}'"

        self.log.info(
            f"Start Copying data from {s3_path} to Table { self.table['name']}"
        )

        redshift.run(f"""
        COPY { self.table["name"]}
        FROM '{s3_path}'
        ACCESS_KEY_ID '{credentials.access_key}'
        SECRET_ACCESS_KEY '{credentials.secret_key}'
        {files_format}
        {delimiter_text}
        {ignoreheader}
        """)
示例#3
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
        credentials = s3_hook.get_credentials()
        copy_options = '\n\t\t\t'.join(self.copy_options)

        copy_query = """
            COPY {schema}.{table}
            FROM 's3://{s3_bucket}/{s3_key}'
            with credentials
            'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
            {copy_options};
        """.format(
            schema=self.schema,
            table=self.table,
            s3_bucket=self.s3_bucket,
            s3_key=self.s3_key,
            access_key=credentials.access_key,
            secret_key=credentials.secret_key,
            copy_options=copy_options,
        )

        self.log.info('Executing COPY command...')
        postgres_hook.run(copy_query, self.autocommit)
        self.log.info("COPY command complete...")
示例#4
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
        credentials = s3_hook.get_credentials()
        copy_options = '\n\t\t\t'.join(self.copy_options)

        copy_statement = f"""
            COPY {self.schema}.{self.table}
            FROM 's3://{self.s3_bucket}/{self.s3_key}'
            with credentials
            'aws_access_key_id={credentials.access_key};aws_secret_access_key={credentials.secret_key}'
            {copy_options};
        """

        if self.truncate_table:
            truncate_statement = f'TRUNCATE TABLE {self.schema}.{self.table};'
            sql = f"""
            BEGIN;
            {truncate_statement}
            {copy_statement}
            COMMIT
            """
        else:
            sql = copy_statement

        self.log.info('Executing COPY command...')
        postgres_hook.run(sql, self.autocommit)
        self.log.info("COPY command complete...")
    def execute(self, context):
        """
        Description: This execution function loads data from a
                     csv-file and writes it to postgres.

        Arguments:
            self: Instance of the class
            context: Context dictionary

        Returns:
            None
        """

        postgres = PostgresHook(postgres_conn_id=self.postgres_conn_id)

        # Truncate table
        self.log.info('Clearing data from Postgres staging table {}'.format(
            self.table))
        trunc_formatted_sql = CSVToPostgresOperator.truncate_sql.format(
            self.table)
        postgres.run(trunc_formatted_sql)

        # Copying data from CSV to Postgres
        self.log.info('Copying data from CSV to Postgres - {}'.format(
            self.table))
        formatted_sql = CSVToPostgresOperator.copy_sql.format(
            self.table, self.path_to_csv, self.delimiter,
            self.additional_params)
        postgres.run(formatted_sql)
        self.log.info('CSVToPostgresOperator for {} completed'.format(
            self.table))
 def init_db():
     try:
         hook = PostgresHook()
         hook.run(CREATE_QUERY)
         hook.run(LOAD_QUERY)
     except ProgrammingError:
         pass
示例#7
0
    def execute(self, context):
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)

        credentials = s3_hook.get_credentials()
        unload_options = '\n\t\t\t'.join(self.unload_options)
        s3_key = '{}/{}_'.format(
            self.s3_key,
            self.table) if self.table_as_file_name else self.s3_key
        select_query = "SELECT * FROM {schema}.{table}".format(
            schema=self.schema, table=self.table)
        unload_query = """
                    UNLOAD ('{select_query}')
                    TO 's3://{s3_bucket}/{s3_key}'
                    with credentials
                    'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
                    {unload_options};
                    """.format(select_query=select_query,
                               s3_bucket=self.s3_bucket,
                               s3_key=s3_key,
                               access_key=credentials.access_key,
                               secret_key=credentials.secret_key,
                               unload_options=unload_options)

        self.log.info('Executing UNLOAD command...')
        postgres_hook.run(unload_query, self.autocommit)
        self.log.info("UNLOAD command complete...")
示例#8
0
    def execute(self, context):
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        if self.delete_all_rows:
            self.log.info("Deleting all rows from table {}".format(self.table))
            delete_stmt = self.DELETE_SQL.format(self.table)
            self.log.info(delete_stmt)
            redshift.run(delete_stmt)

        insert_stmt = self.INSERT_SQL.format(self.table, self.sql)
        self.log.info("Insert statement for fact {}".format(insert_stmt))
        redshift.run(insert_stmt)
示例#9
0
class PostgresOperator(BaseOperator):
    """
    Executes sql code in a specific Postgres database

    :param sql: the sql code to be executed. (templated)
    :type sql: Can receive a str representing a sql statement,
        a list of str (sql statements), or reference to a template file.
        Template reference are recognized by str ending in '.sql'
    :param postgres_conn_id: The :ref:`postgres conn id <howto/connection:postgres>`
        reference to a specific postgres database.
    :type postgres_conn_id: str
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :type autocommit: bool
    :param parameters: (optional) the parameters to render the SQL query with.
    :type parameters: dict or iterable
    :param database: name of database which overwrite defined one in connection
    :type database: str
    """

    template_fields = ('sql', )
    template_fields_renderers = {'sql': 'sql'}
    template_ext = ('.sql', )
    ui_color = '#ededed'

    @apply_defaults
    def __init__(
        self,
        *,
        sql: str,
        postgres_conn_id: str = 'postgres_default',
        autocommit: bool = False,
        parameters: Optional[Union[Mapping, Iterable]] = None,
        database: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.sql = sql
        self.postgres_conn_id = postgres_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.database = database
        self.hook = None

    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id,
                                 schema=self.database)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
        for output in self.hook.conn.notices:
            self.log.info(output)
示例#10
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        conn = S3Hook.get_connection(conn_id=self.aws_conn_id)

        credentials_block = None
        if conn.extra_dejson.get('role_arn', False):
            credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
        else:
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
            credentials = s3_hook.get_credentials()
            credentials_block = build_credentials_block(credentials)

        copy_options = '\n\t\t\t'.join(self.copy_options)
        destination = f'{self.schema}.{self.table}'
        copy_destination = f'#{self.table}' if self.method == 'UPSERT' else destination

        copy_statement = self._build_copy_query(copy_destination,
                                                credentials_block,
                                                copy_options)

        if self.method == 'REPLACE':
            sql = f"""
            BEGIN;
            DELETE FROM {destination};
            {copy_statement}
            COMMIT
            """
        elif self.method == 'UPSERT':
            keys = self.upsert_keys or postgres_hook.get_table_primary_key(
                self.table, self.schema)
            if not keys:
                raise AirflowException(
                    f"No primary key on {self.schema}.{self.table}. Please provide keys on 'upsert_keys'"
                )
            where_statement = ' AND '.join(
                [f'{self.table}.{k} = {copy_destination}.{k}' for k in keys])
            sql = f"""
            CREATE TABLE {copy_destination} (LIKE {destination});
            {copy_statement}
            BEGIN;
            DELETE FROM {destination} USING {copy_destination} WHERE {where_statement};
            INSERT INTO {destination} SELECT * FROM {copy_destination};
            COMMIT
            """
        else:
            sql = copy_statement

        self.log.info('Executing COPY command...')
        postgres_hook.run(sql, self.autocommit)
        self.log.info("COPY command complete...")
示例#11
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)

        credentials = s3_hook.get_credentials()
        credentials_block = build_credentials_block(credentials)
        unload_options = '\n\t\t\t'.join(self.unload_options)

        unload_query = self._build_unload_query(credentials_block,
                                                self._select_query,
                                                self.s3_key, unload_options)

        self.log.info('Executing UNLOAD command...')
        postgres_hook.run(unload_query, self.autocommit)
        self.log.info("UNLOAD command complete...")
    def execute(self, context):
        self.log.info('Connecting to redshift!')
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        if self.truncate_table:
            self.log.info(f"Truncating table: {self.table}")
            redshift.run(f"""
                        TRUNCATE TABLE {self.table};
                    """)

        self.log.info('Loading dimension table into redshift')
        redshift.run(f"""
            INSERT INTO {self.table}
            {self.select_sql}
        """)
示例#13
0
文件: postgres.py 项目: mhenc/airflow
class PostgresOperator(BaseOperator):
    """
    Executes sql code in a specific Postgres database

    :param sql: the SQL code to be executed as a single string, or
        a list of str (sql statements), or a reference to a template file.
        Template references are recognized by str ending in '.sql'
    :param postgres_conn_id: The :ref:`postgres conn id <howto/connection:postgres>`
        reference to a specific postgres database.
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :param parameters: (optional) the parameters to render the SQL query with.
    :param database: name of database which overwrite defined one in connection
    """

    template_fields: Sequence[str] = ('sql', )
    # TODO: Remove renderer check when the provider has an Airflow 2.3+ requirement.
    template_fields_renderers = {
        'sql':
        'postgresql' if 'postgresql' in wwwutils.get_attr_renderer() else 'sql'
    }
    template_ext: Sequence[str] = ('.sql', )
    ui_color = '#ededed'

    def __init__(
        self,
        *,
        sql: Union[str, List[str]],
        postgres_conn_id: str = 'postgres_default',
        autocommit: bool = False,
        parameters: Optional[Union[Mapping, Iterable]] = None,
        database: Optional[str] = None,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.sql = sql
        self.postgres_conn_id = postgres_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.database = database
        self.hook: Optional[PostgresHook] = None

    def execute(self, context: 'Context'):
        self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id,
                                 schema=self.database)
        self.hook.run(self.sql, self.autocommit, parameters=self.parameters)
        for output in self.hook.conn.notices:
            self.log.info(output)
示例#14
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)

        credentials = s3_hook.get_credentials()
        credentials_block = build_credentials_block(credentials)
        unload_options = '\n\t\t\t'.join(self.unload_options)
        s3_key = f"{self.s3_key}/{self.table}_" if self.table_as_file_name else self.s3_key
        select_query = f"SELECT * FROM {self.schema}.{self.table}"

        unload_query = self._build_unload_query(credentials_block,
                                                select_query, s3_key,
                                                unload_options)

        self.log.info('Executing UNLOAD command...')
        postgres_hook.run(unload_query, self.autocommit)
        self.log.info("UNLOAD command complete...")
    def execute(self, context):
        aws_hook = AwsBaseHook(self.aws_credentials_id, client_type="s3")
        credentials = aws_hook.get_credentials()
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        self.log.info("Clearing data from destination Redshift table")
        redshift.run("DELETE FROM {}".format(self.table))

        self.log.info("Copying data from S3 to Redshift")
        rendered_key = self.s3_key.format(**context)
        s3_path = "s3://{}/{}".format(self.s3_bucket, rendered_key)
        formatted_sql = StageToRedshiftOperator.copy_sql.format(
            self.table,
            s3_path,
            credentials.access_key,
            credentials.secret_key,
            self.json_format,
        )
        redshift.run(formatted_sql)
    def execute(self, context):
        aws_hook = AwsBaseHook(self.aws_credentials_id)
        aws_credentials = aws_hook.get_credentials()
        redshift_conn = PostgresHook(
            postgres_conn_id=self.redshift_conn_id,
            connect_args={
                'keepalives': 1,
                'keepalives_idle': 60,
                'keepalives_interval': 60
            })

        self.log.debug(f"Truncate Table: {self.table}")
        redshift_conn.run(f"TRUNCATE TABLE {self.table}")

        format = ''
        if self.data_format == 'csv' and self.ignore_header > 0:
            format += f"IGNOREHEADER {self.ignore_header}\n"

        if self.data_format == 'csv':
            format += f"DELIMITER '{self.delimiter}'\n"
        elif self.data_format == 'json':
            format += f"FORMAT AS JSON '{self.jsonpath}'\n"
        format += f"{self.copy_opts}"
        self.log.debug(f"format : {format}")

        formatted_key = self.s3_src_bucket_key.format(**context)
        self.log.info(f"Rendered S3 source file key : {formatted_key}")
        s3_url = f"s3://{self.s3_src_bucket_name}/{formatted_key}"
        self.log.debug(f"S3 URL : {s3_url}")
        formatted_sql = self._sql.format(**dict(
            table=self.table,
            source=s3_url,
            access_key=aws_credentials.access_key,
            secret_access_key=aws_credentials.secret_key,
            format=format
        ))
        self.log.debug(f"Base SQL: {self._sql}")

        self.log.info(f"Copying data from S3 to Redshift table {self.table}...")
        redshift_conn.run(formatted_sql)
        self.log.info(f"Finished copying data from S3 to Redshift table {self.table}")
    def execute(self, context=None):
        """
        Format the sql statements with the params_sql statement.
        Execute one by one the different statements.
        Args:
            context:

        Returns:

        """
        if self.params_sql is not None:
            commands_formatted = [
                S.SQL(q).format(**self.params_sql)
                for q in self.commands_stripped
            ]
        else:
            commands_formatted = [S.SQL(q) for q in self.commands_stripped]
        hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        for qf in commands_formatted:
            self.log.info("Executing Query:{}".format(
                qf.as_string(hook.get_conn())))
            hook.run((qf, ))
            pass
示例#18
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
        credentials = s3_hook.get_credentials()
        credentials_block = build_credentials_block(credentials)
        copy_options = '\n\t\t\t'.join(self.copy_options)

        copy_statement = self._build_copy_query(credentials_block, copy_options)

        if self.truncate_table:
            delete_statement = f'DELETE FROM {self.schema}.{self.table};'
            sql = f"""
            BEGIN;
            {delete_statement}
            {copy_statement}
            COMMIT
            """
        else:
            sql = copy_statement

        self.log.info('Executing COPY command...')
        postgres_hook.run(sql, self.autocommit)
        self.log.info("COPY command complete...")
示例#19
0
    def execute(self, context) -> None:
        postgres_hook = PostgresHook(postgres_conn_id=self.redshift_conn_id)
        conn = S3Hook.get_connection(conn_id=self.aws_conn_id)

        credentials_block = None
        if conn.extra_dejson.get('role_arn', False):
            credentials_block = f"aws_iam_role={conn.extra_dejson['role_arn']}"
        else:
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id, verify=self.verify)
            credentials = s3_hook.get_credentials()
            credentials_block = build_credentials_block(credentials)

        unload_options = '\n\t\t\t'.join(self.unload_options)

        unload_query = self._build_unload_query(credentials_block,
                                                self.select_query, self.s3_key,
                                                unload_options)

        self.log.info('Executing UNLOAD command...')
        postgres_hook.run(unload_query,
                          self.autocommit,
                          parameters=self.parameters)
        self.log.info("UNLOAD command complete...")
    def execute(self, context):
        """
        Description: This custom function fills a given fact table with a passed
                     SQL statement.

        Arguments:
            self: Instance of the class
            context: Context dictionary

        Returns:
            None
        """

        # Build connection
        postgres = PostgresHook(postgres_conn_id=self.postgres_conn_id)

        # Realize insert statement to fill dimension table
        formatted_sql = LoadFactOperator.insert_sql.format(
            self.table, self.insert_sql_query)
        postgres.run(formatted_sql)

        self.log.info(
            'LoadFactOperator for dimension table {} completed'.format(
                self.table))
示例#21
0
    def execute(self, context):
        action = f"Redshift {len(self.query_list)} queries of {self.query_type} "
        self.log.info(f"Start {action}")
        self.log.info(self.redshift_conn_id)
        redshift = PostgresHook(postgres_conn_id=self.redshift_conn_id)

        for query in self.query_list:
            if self.query_type == 'insert':
                if not self.append_data:
                    self.log.info(f"Clearing data from destination Redshift table {query[1]}")
                    redshift.run("DELETE FROM {}".format(query[1]))
                
                self.log.info(f"Insert data into destination Redshift table {query[1]}")
                redshift.run(query[0])
            else:
                redshift.run(query)

        self.log.info(f"End {action}")
 def drop_db():
     hook = PostgresHook()
     hook.run(DELETE_QUERY)
示例#23
0
class PostgresOperator(BaseOperator):
    """
    Executes sql code in a specific Postgres database

    :param sql: the SQL code to be executed as a single string, or
        a list of str (sql statements), or a reference to a template file.
        Template references are recognized by str ending in '.sql'
    :param postgres_conn_id: The :ref:`postgres conn id <howto/connection:postgres>`
        reference to a specific postgres database.
    :param autocommit: if True, each command is automatically committed.
        (default value: False)
    :param parameters: (optional) the parameters to render the SQL query with.
    :param database: name of database which overwrite defined one in connection
    """

    template_fields: Sequence[str] = ('sql', )
    # TODO: Remove renderer check when the provider has an Airflow 2.3+ requirement.
    template_fields_renderers = {
        'sql':
        'postgresql' if 'postgresql' in wwwutils.get_attr_renderer() else 'sql'
    }
    template_ext: Sequence[str] = ('.sql', )
    ui_color = '#ededed'

    def __init__(
        self,
        *,
        sql: Union[str, Iterable[str]],
        postgres_conn_id: str = 'postgres_default',
        autocommit: bool = False,
        parameters: Optional[Union[Iterable, Mapping]] = None,
        database: Optional[str] = None,
        runtime_parameters: Optional[Mapping] = None,
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.sql = sql
        self.postgres_conn_id = postgres_conn_id
        self.autocommit = autocommit
        self.parameters = parameters
        self.database = database
        self.runtime_parameters = runtime_parameters
        self.hook: Optional[PostgresHook] = None

    def execute(self, context: 'Context'):
        self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id,
                                 schema=self.database)
        if self.runtime_parameters:
            final_sql = []
            sql_param = {}
            for param in self.runtime_parameters:
                set_param_sql = f"SET {{}} TO %({param})s;"
                dynamic_sql = SQL(set_param_sql).format(Identifier(f"{param}"))
                final_sql.append(dynamic_sql)
            for param, val in self.runtime_parameters.items():
                sql_param.update({f"{param}": f"{val}"})
            if self.parameters:
                sql_param.update(self.parameters)
            if isinstance(self.sql, str):
                final_sql.append(SQL(self.sql))
            else:
                final_sql.extend(list(map(SQL, self.sql)))
            self.hook.run(final_sql, self.autocommit, parameters=sql_param)
        else:
            self.hook.run(self.sql,
                          self.autocommit,
                          parameters=self.parameters)
        for output in self.hook.conn.notices:
            self.log.info(output)
示例#24
0
    def execute(self, context):
        """
        Description: This execution function gets flight data from the OpenSky REST API
                     hour per hour and writes this data to postgres.

        Arguments:
            self: Instance of the class
            context: Context dictionary

        Returns:
            None
        """

        # Build connections
        postgres = PostgresHook(postgres_conn_id=self.postgres_conn_id)
        api_connection = BaseHook.get_connection(conn_id=self.api_conn_id)

        # Build correct timestamps in unix-format for passing to the api query
        self.log.info('api_query_dateeee: {}'.format(
            self.api_query_date.format(**context)))
        start_time = datetime.fromisoformat(
            self.api_query_date.format(**context))
        end_time = start_time + timedelta(hours=1)
        start_timestamp = str(int(datetime.timestamp(start_time)))
        end_timestamp = str(int(datetime.timestamp(end_time)))

        # Build complete api path
        # 'https://{}:{}@opensky-network.org/api/flights/all?begin={}&end={}'
        complete_api_path = self.api_path.format(api_connection.login,
                                                 api_connection.password,
                                                 start_timestamp,
                                                 end_timestamp)
        self.log.info('api_path: {}'.format(complete_api_path))

        # If parameter truncate_table is true, then truncate given table
        if self.truncate_table:
            self.log.info('Truncate data from staging table {}'.format(
                self.table))
            trunc_formatted_sql = APItoPostgresOperator.truncate_sql.format(
                self.table)
            postgres.run(trunc_formatted_sql)

        # Get data from api
        try:
            response = requests.get(complete_api_path)
            data = response.json()
        except:
            self.log.info('API request error - message:{}'.format(
                sys.exc_info()[0]))

        # If response is OK and length of data > 0 then write data to the database
        if (response.status_code == 200):
            if (len(data) > 0):
                for element in data:
                    formatted_sql = APItoPostgresOperator.insert_sql.format(
                        self.table, element['icao24'], element['firstSeen'],
                        element['estDepartureAirport'], element['lastSeen'],
                        element['estArrivalAirport'], element['callsign'])
                    postgres.run(formatted_sql)
            else:
                self.log.info(
                    'API request doesnt contain data - datetime:{}'.format(
                        start_time))

        else:
            self.log.info(
                'API request problem - datetime:{} - response_code:{}'.format(
                    start_time, str(response)))

        self.log.info(
            'APItoPostgresOperator for {} completed - datetime: {}'.format(
                self.table, start_time))
示例#25
0
class S3ToRedshiftOperator(BaseOperator):
    """
    Executes an COPY command to load files from s3 to Redshift

    :param schema: reference to a specific schema in redshift database
    :type schema: str
    :param table: reference to a specific table in redshift database
    :type table: str
    :param s3_bucket: reference to a specific S3 bucket
    :type s3_bucket: str
    :param s3_key: reference to a specific S3 key
    :type s3_key: str
    :param redshift_conn_id: reference to a specific redshift database
    :type redshift_conn_id: str
    :param aws_conn_id: reference to a specific S3 connection
    :type aws_conn_id: str
    :param verify: Whether or not to verify SSL certificates for S3 connection.
        By default SSL certificates are verified.
        You can provide the following values:

        - ``False``: do not validate SSL certificates. SSL will still be used
                 (unless use_ssl is False), but SSL certificates will not be
                 verified.
        - ``path/to/cert/bundle.pem``: A filename of the CA cert bundle to uses.
                 You can specify this argument if you want to use a different
                 CA cert bundle than the one used by botocore.
    :type verify: bool or str
    :param copy_options: reference to a list of COPY options
    :type copy_options: list
    """

    template_fields = ()
    template_ext = ()
    ui_color = '#ededed'

    @apply_defaults
    def __init__(self,
                 schema: str,
                 table: str,
                 s3_bucket: str,
                 s3_key: str,
                 redshift_conn_id: str = 'redshift_default',
                 aws_conn_id: str = 'aws_default',
                 verify: Optional[Union[bool, str]] = None,
                 copy_options: Optional[List] = None,
                 autocommit: bool = False,
                 *args,
                 **kwargs) -> None:
        super().__init__(*args, **kwargs)
        self.schema = schema
        self.table = table
        self.s3_bucket = s3_bucket
        self.s3_key = s3_key
        self.redshift_conn_id = redshift_conn_id
        self.aws_conn_id = aws_conn_id
        self.verify = verify
        self.copy_options = copy_options or []
        self.autocommit = autocommit
        self._s3_hook = None
        self._postgres_hook = None

    def execute(self, context):
        self._postgres_hook = PostgresHook(
            postgres_conn_id=self.redshift_conn_id)
        self._s3_hook = S3Hook(aws_conn_id=self.aws_conn_id,
                               verify=self.verify)
        credentials = self._s3_hook.get_credentials()
        copy_options = '\n\t\t\t'.join(self.copy_options)

        copy_query = """
            COPY {schema}.{table}
            FROM 's3://{s3_bucket}/{s3_key}/{table}'
            with credentials
            'aws_access_key_id={access_key};aws_secret_access_key={secret_key}'
            {copy_options};
        """.format(schema=self.schema,
                   table=self.table,
                   s3_bucket=self.s3_bucket,
                   s3_key=self.s3_key,
                   access_key=credentials.access_key,
                   secret_key=credentials.secret_key,
                   copy_options=copy_options)

        self.log.info('Executing COPY command...')
        self._postgres_hook.run(copy_query, self.autocommit)
        self.log.info("COPY command complete...")