示例#1
0
def put_tags_jokes(conn_id_extract, conn_id_load):
    logger = logging.getLogger(__name__)
    sql_extract = """
    select
        jt.joke_id as id,
        string_agg(t.name, ',') as tags
    from
        joke_tags jt
    left join
        tags t
    on
        t.id = jt.tag_id
    where
        jt.joke_id in (select jts.id from jokes_to_send jts where jts.tags is null)
    group by
        jt.joke_id;
    """

    conn_extract = PostgresHook(postgres_conn_id=conn_id_extract, schema="jokes-app")
    df_joke_tags = conn_extract.get_pandas_df(sql=sql_extract)

    logging.info(f"n Jokes to update tags: {len(df_joke_tags)}")
    if not df_joke_tags.empty:
        conn_load = PostgresUpdateApiHook(postgres_conn_id=conn_id_load, schema="jokes-app")

        conn_load.update_column(table="jokes_to_send", column_name="tags", df=df_joke_tags, id_column_name="id")
        logger.info(f"Done updating tags for '{len(df_joke_tags)}' jokes")
    else:
        logger.info("No new tags for jokes")
示例#2
0
def check_validated_jokes(conn_id_extract, conn_id_load):
    logger = logging.getLogger(__name__)

    sql_get_jokes = "select joke, created_at from validate_jokes where deleted_at is null and is_joke is true"
    logger.info(f"Running postgres query: '{sql_get_jokes}'")
    conn_extract = PostgresHook(postgres_conn_id=conn_id_extract, schema="jokes-app")
    df_validated_jokes = conn_extract.get_pandas_df(sql=sql_get_jokes)

    logging.info(f"n Validated jokes: {len(df_validated_jokes)} ")
    if not df_validated_jokes.empty:

        # load validated jokes to Load connection - jokes_to_send
        conn_load = PostgresHook(postgres_conn_id=conn_id_load, schema="jokes-app")
        conn_load.insert_rows(
            table="jokes_to_send",
            rows=df_validated_jokes.values.tolist(),
            target_fields=list(df_validated_jokes.keys()),
        )

        # put soft-delete in validate_jokes
        sql_update_query = "update validate_jokes set deleted_at = NOW() where deleted_at is null and is_joke is true"
        logger.info(f"Running postgres query: '{sql_update_query}'")
        conn_extract.run(sql=sql_update_query)

        logger.info(f"Updated '{len(df_validated_jokes)}' jokes. New jokes in Jokes DB")

    else:
        logger.info("No new validated jokes to put into the DB")
示例#3
0
def write_api_table(data, schema, table):
    '''
    write a temporary table to the database,using the temporary table to execute
    SQL statements (@ cursor.execute(sql)), and then dropping the temporary table.
    '''
    sql_hook = PostgresHook(postgres_conn_id='warehouse')
    warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn()
    cursor = warehouse.cursor()
    cursor.execute('''DROP TABLE IF EXISTS {0}.{1}_api;'''.format(
        schema, table))
    warehouse.commit()
    data.to_sql(schema=schema,
                name=table + '_api',
                index=False,
                con=sql_hook.get_sqlalchemy_engine())
    warehouse.close()
def check_table_exist(sql_to_get_schema, sql_to_check_table_exist, table_name):
    """ callable function to get schema name and after that check if table exist """
    hook = PostgresHook('airflow_course_postgres')
    # get schema name
    query = hook.get_records(sql=sql_to_get_schema)
    schema = None
    for result in query:
        if table_name in result:
            schema = result[0]
            print(schema)
            break

    # check table exist
    query = hook.get_first(
        sql=sql_to_check_table_exist.format(schema, table_name))
    print(query)
    return True if query else False
示例#5
0
def drop_api_table(schema, table):
    '''
    drop the api table that contains the updated wmata information
    '''
    warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn()
    cursor = warehouse.cursor()
    cursor.execute('''DROP TABLE IF EXISTS {0}.{1}_api;'''.format(
        schema, table))
    warehouse.commit()
    warehouse.close()
示例#6
0
def execute_sql_statement(sql):
    '''
    execute the sql queries to insert new records, update existing information,
    and mark expired records
    '''
    warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn()
    cursor = warehouse.cursor()
    cursor.execute(sql)
    warehouse.commit()
    warehouse.close()
示例#7
0
import textwrap
import pandas
import psycopg2 as psql
from airflow.operators.postgres_operator import PostgresOperator, PostgresHook
from airflow.hooks.base_hook import BaseHook

headers = {'api_key': 'a2d3eedb7b804592b09ed95ac11ffc9f'}

sql_hook = PostgresHook(postgres_conn_id='warehouse')
warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn()


def return_data(response, key):
    '''
    convert JSON format to pandas data frame
    '''
    return pandas.io.json.json_normalize(response.json()[key])


# insert records into warehouse table
def write_api_table(data, schema, table):
    '''
    write a temporary table to the database,using the temporary table to execute
    SQL statements (@ cursor.execute(sql)), and then dropping the temporary table.
    '''
    sql_hook = PostgresHook(postgres_conn_id='warehouse')
    warehouse = PostgresHook(postgres_conn_id='warehouse').get_conn()
    cursor = warehouse.cursor()
    cursor.execute('''DROP TABLE IF EXISTS {0}.{1}_api;'''.format(
        schema, table))
    warehouse.commit()