def execute(self, context):

        if self.dag_dependencies:
            self.check_for_dependencies()

        tables_sql = \
        """
        SELECT table_name
        FROM information_schema.tables
        WHERE table_schema = '{0}'
        """.format(self.db_schema)

        hook = PostgresHook(self.db_conn_id)

        records = [record[0] for record in hook.get_records(tables_sql)]

        for record in records:
            if self.key in record:
                logging.info('Dropping: {}.{}'.format(str(self.db_schema),
                                                      str(self.key)))
                drop_sql = \
                """
                DROP TABLE {0}.{1}
                """.format(self.db_schema, record)

                hook.run(drop_sql)
示例#2
0
def process_customers_order_dim(**kwargs):
    conn_id = kwargs.get('conn_id')
    pg_hook = PostgresHook(conn_id)
    sql = "select distinct customer.cust_name, order_info.order_id from customer, order_info where customer.customer_id=order_info.customer_id;"

    records = pg_hook.get_records(sql)
    print(records)

    return records
示例#3
0
def cache_latest_stocks(ds, **kwargs):
    redis_conn = redis.StrictRedis(host='redis')
    pg_hook = PostgresHook(postgres_conn_id='stocks')
    latest_stocks = """SELECT DISTINCT ON (symbol)
                             symbol, price
                      FROM   stocks
                      ORDER  BY symbol, valid_until DESC;"""

    for iso2, stock in pg_hook.get_records(latest_stocks):
        redis_conn.set(iso2, stock)
示例#4
0
def cache_latest_rates(ds, **kwargs):
    redis_conn = redis.StrictRedis(host='redis')
    pg_hook = PostgresHook(postgres_conn_id='rates')
    latest_rates = """SELECT DISTINCT ON (pair)
                             pair, rate
                      FROM   rates
                      ORDER  BY pair, valid_until DESC;"""

    for iso2, rate in pg_hook.get_records(latest_rates):
        redis_conn.set(iso2, rate)
示例#5
0
def pull_and_insert(**kwargs):

    #Pull query from sql directory
    query = kwargs['templates_dict']['query']

    #Initialize snql hook and pull data
    snql_hook = PostgresHook('snql')
    results = snql_hook.get_records(query)

    #Initialize staging hook and insert data to staging table
    staging_hook = PostgresHook('snql_staging')
    staging_hook.insert_rows('dim_sneakers', results)
示例#6
0
def populate_target(**kwargs):

    #Pull data from staging
    staging_hook = PostgresHook('snql_staging')
    #Pull query from sql directory
    query = kwargs['templates_dict']['query']
    #Execute query
    staging_results = staging_hook.get_records(query)

    #Initialize hook to snql
    snql_hook = PostgresHook('snql')

    #Delete current rows in target table table
    snql_hook.run("""DELETE FROM dim_sneakers""")

    #Insert new rows into target table
    snql_hook.insert_rows('dim_sneakers', staging_results)
class PostgresOperator(BaseOperator):

    template_fields = ('sql', )
    template_ext = ('.sql', )
    ui_color = '#ededed'

    @apply_defaults
    def __init__(self,
                 sql,
                 table="public.new",
                 postgres_conn_id='postgres_default',
                 database=None,
                 autocommit=False,
                 *args,
                 **kwargs):
        super(PostgresOperator, self).__init__(*args, **kwargs)

        # self.sql = sql
        self.postgres_conn_id = postgres_conn_id
        self.autocommit = autocommit
        self.table = table
        self.database = database
        self.sql = sql

    def execute(self, context):
        self.log.info('Executing: %s', self.sql)
        self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id,
                                 schema=self.database)
        self.hook.run(self.sql, self.autocommit)
        for output in self.hook.conn.notices:
            self.log.info(output)
        logging.info('Executing: ' + self.sql)
        self.hook = PostgresHook(postgres_conn_id=self.postgres_conn_id)
        self.hook.run(self.sql, self.autocommit)
        records = self.hook.get_records(f"SELECT COUNT(*) FROM {self.table}")
        if len(records) < 1 or len(records[0]) < 1:
            raise ValueError(
                f"Data quality check failed. {self.table} returned no results")
        num_records = records[0][0]
        if num_records < 1:
            raise ValueError(
                f"Data quality check failed. {self.table} contained 0 rows")
        self.log.info(
            f'Data quality on table {self.table} check passed with {records[0][0]} records'
        )
示例#8
0
def redshift_call(sql, type_='run'):
    pghook = PostgresHook(postgres_conn_id="naveen_redshift")
    if type_ == 'run':
        return pghook.run(sql)
    else:
        return pghook.get_records(sql)
示例#9
0
def postgres_call(sql):
    pghook = PostgresHook(postgres_conn_id="naveen_ngrok_postgres")
    return pghook.get_records(sql)