def put_tags_jokes(conn_id_extract, conn_id_load): logger = logging.getLogger(__name__) sql_extract = """ select jt.joke_id as id, string_agg(t.name, ',') as tags from joke_tags jt left join tags t on t.id = jt.tag_id where jt.joke_id in (select jts.id from jokes_to_send jts where jts.tags is null) group by jt.joke_id; """ conn_extract = PostgresHook(postgres_conn_id=conn_id_extract, schema="jokes-app") df_joke_tags = conn_extract.get_pandas_df(sql=sql_extract) logging.info(f"n Jokes to update tags: {len(df_joke_tags)}") if not df_joke_tags.empty: conn_load = PostgresUpdateApiHook(postgres_conn_id=conn_id_load, schema="jokes-app") conn_load.update_column(table="jokes_to_send", column_name="tags", df=df_joke_tags, id_column_name="id") logger.info(f"Done updating tags for '{len(df_joke_tags)}' jokes") else: logger.info("No new tags for jokes")
def check_validated_jokes(conn_id_extract, conn_id_load): logger = logging.getLogger(__name__) sql_get_jokes = "select joke, created_at from validate_jokes where deleted_at is null and is_joke is true" logger.info(f"Running postgres query: '{sql_get_jokes}'") conn_extract = PostgresHook(postgres_conn_id=conn_id_extract, schema="jokes-app") df_validated_jokes = conn_extract.get_pandas_df(sql=sql_get_jokes) logging.info(f"n Validated jokes: {len(df_validated_jokes)} ") if not df_validated_jokes.empty: # load validated jokes to Load connection - jokes_to_send conn_load = PostgresHook(postgres_conn_id=conn_id_load, schema="jokes-app") conn_load.insert_rows( table="jokes_to_send", rows=df_validated_jokes.values.tolist(), target_fields=list(df_validated_jokes.keys()), ) # put soft-delete in validate_jokes sql_update_query = "update validate_jokes set deleted_at = NOW() where deleted_at is null and is_joke is true" logger.info(f"Running postgres query: '{sql_update_query}'") conn_extract.run(sql=sql_update_query) logger.info(f"Updated '{len(df_validated_jokes)}' jokes. New jokes in Jokes DB") else: logger.info("No new validated jokes to put into the DB")
def write_api_table(data, schema, table): ''' write a temporary table to the database,using the temporary table to execute SQL statements (@ cursor.execute(sql)), and then dropping the temporary table. ''' sql_hook = PostgresHook(postgres_conn_id='warehouse') warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn() cursor = warehouse.cursor() cursor.execute('''DROP TABLE IF EXISTS {0}.{1}_api;'''.format( schema, table)) warehouse.commit() data.to_sql(schema=schema, name=table + '_api', index=False, con=sql_hook.get_sqlalchemy_engine()) warehouse.close()
def check_table_exist(sql_to_get_schema, sql_to_check_table_exist, table_name): """ callable function to get schema name and after that check if table exist """ hook = PostgresHook('airflow_course_postgres') # get schema name query = hook.get_records(sql=sql_to_get_schema) schema = None for result in query: if table_name in result: schema = result[0] print(schema) break # check table exist query = hook.get_first( sql=sql_to_check_table_exist.format(schema, table_name)) print(query) return True if query else False
def drop_api_table(schema, table): ''' drop the api table that contains the updated wmata information ''' warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn() cursor = warehouse.cursor() cursor.execute('''DROP TABLE IF EXISTS {0}.{1}_api;'''.format( schema, table)) warehouse.commit() warehouse.close()
def execute_sql_statement(sql): ''' execute the sql queries to insert new records, update existing information, and mark expired records ''' warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn() cursor = warehouse.cursor() cursor.execute(sql) warehouse.commit() warehouse.close()
import textwrap import pandas import psycopg2 as psql from airflow.operators.postgres_operator import PostgresOperator, PostgresHook from airflow.hooks.base_hook import BaseHook headers = {'api_key': 'a2d3eedb7b804592b09ed95ac11ffc9f'} sql_hook = PostgresHook(postgres_conn_id='warehouse') warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn() def return_data(response, key): ''' convert JSON format to pandas data frame ''' return pandas.io.json.json_normalize(response.json()[key]) # insert records into warehouse table def write_api_table(data, schema, table): ''' write a temporary table to the database,using the temporary table to execute SQL statements (@ cursor.execute(sql)), and then dropping the temporary table. ''' sql_hook = PostgresHook(postgres_conn_id='warehouse') warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn() cursor = warehouse.cursor() cursor.execute('''DROP TABLE IF EXISTS {0}.{1}_api;'''.format( schema, table)) warehouse.commit()