from pyspark.sql.functions import col
from constants import dict_dbs_locations, dict_dbs_names


def load_l_airport(spark, integration_layer_loc, landing_zone_name):
    delta_l_airport = DeltaTable.forPath(spark,
                                         integration_layer_loc + '/L_AIRPORT')

    df_LZ_l_airport = spark.sql(f"""
        SELECT 
        CODE
        ,DESCRIPTION
        FROM {landing_zone_name}.L_AIRPORT
    """)

    delta_l_airport.alias("oldData") \
        .merge(df_LZ_l_airport.alias("newData"), "oldData.CODE = newData.CODE") \
        .whenMatchedUpdate(set={"DESCRIPTION": col("newData.DESCRIPTION")}) \
        .whenNotMatchedInsert(values={"CODE": col("newData.CODE"), "DESCRIPTION": col("newData.DESCRIPTION")}) \
        .execute()


if __name__ == '__main__':
    spark = initialize_spark_session('load_l_airport')
    from delta.tables import *

    integration_layer_loc = dict_dbs_locations.get('INTEGRATION_LAYER_LOC')
    landing_zone_name = dict_dbs_names.get('LANDING_ZONE_NAME')

    load_l_airport(spark, integration_layer_loc, landing_zone_name)
from helper_functions.initialize_spark_session import initialize_spark_session
import os
from pyspark.sql.functions import col

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':
    spark = initialize_spark_session('create_presentation_layer')

    from delta.tables import *

    # Creating the presentation_layer database in spark sql
    try:

        db_name = dict_dbs_names.get('PRESENTATION_LAYER_NAME')
        db_loc = dict_dbs_locations.get('PRESENTATION_LAYER_LOC')

        spark.sql(
            ddl_create_presentation_layer_db.format(
                presentation_layer_db_name=db_name,
                presentation_layer_db_loc=db_loc))

        spark.sql(f'USE {db_name}')

        logging.info(f'{db_name} has been created.')

    except Exception as e:
        logging.error(f'Failed to create the {db_name} db in spark sql,{e}')
        spark.stop()
        raise Exception(f'Failed to create the {db_name}, {e}')
        logging.info(
            'CITY_DEMOGRAPHICS has been loaded in the Presentation layer')

    except Exception as e:
        logging.error(
            'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer')
        spark.stop()
        raise Exception(
            f'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer,{e}')


if __name__ == '__main__':
    spark = initialize_spark_session('load_pl_city_demographics')
    from delta.tables import *

    try:

        presentation_layer_loc = dict_dbs_locations.get(
            'PRESENTATION_LAYER_LOC')
        presentation_layer_name = dict_dbs_names.get('PRESENTATION_LAYER_NAME')
        integration_layer_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME')

    except Exception as e:
        logging.error('Failed to retrieve Environment variables')
        spark.stop()
        raise Exception(
            f'Failed to load CITY_DEMOGRAPHICS in the Presentation Layer,{e}')

    load_pl_city_demographics(spark, presentation_layer_name,
                              presentation_layer_loc, integration_layer_name)
import logging
from sql_queries.landing_zone_ddl import ddl_create_land_zone_db, dict_landing_zone_ddls
from constants import dict_dbs_locations, dict_dbs_names
from helper_functions.initialize_spark_session import initialize_spark_session

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':

    spark = initialize_spark_session('create_landing_zone')

    # Creating the landing_zone database in spark sql
    try:

        db_name = dict_dbs_names.get('LANDING_ZONE_NAME')
        db_loc = dict_dbs_locations.get('LANDING_ZONE_LOC')

        spark.sql(
            ddl_create_land_zone_db.format(landing_zone_db_name=db_name,
                                           landing_zone_db_loc=db_loc))

        logging.info(f'{db_name} has been created.')

    except Exception as e:
        logging.error(f'Failed to create the {db_name} db in spark sql,{e}')
        spark.stop()
        raise Exception(f'Failed to create the {db_name}, {e}')

    # creating landing zone tables
    try:
示例#5
0
                "STATE_ABR": col("newData.STATE_ABR"),
                "STATE_FIPS": col("newData.STATE_FIPS"),
                "STATE_NAME": col("newData.STATE_NAME"),
                "WAC_CODE": col("newData.WAC_CODE")
            }) \
            .execute()

        logging.info('STATE has been loaded in the Presentation layer')

    except Exception as e:
        logging.error('Failed to load STATE in the Presentation Layer')
        spark.stop()
        raise Exception(f'Failed to load STATE in the Presentation Layer,{e}')


if __name__ == '__main__':
    spark = initialize_spark_session('load_pl_state')
    from delta.tables import *

    try:
        presentation_layer_loc = dict_dbs_locations.get(
            'PRESENTATION_LAYER_LOC')
        integration_layer_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME')

    except Exception as e:
        logging.error('Failed to retrieve Environment variables')
        spark.stop()
        raise Exception(f'Failed to load STATE in the Presentation Layer,{e}')

    load_pl_state(spark, presentation_layer_loc, integration_layer_name)
from helper_functions.initialize_spark_session import initialize_spark_session
import os
from pyspark.sql.functions import col

logging.basicConfig(level=logging.INFO,
                    format="%(asctime)s: %(levelname)s: %(message)s ")

if __name__ == '__main__':
    spark = initialize_spark_session('create_integration_layer')

    from delta.tables import *

    # Creating the integration_layer database in spark sql
    try:

        db_name = dict_dbs_names.get('INTEGRATION_LAYER_NAME')
        db_loc = dict_dbs_locations.get('INTEGRATION_LAYER_LOC')

        spark.sql(
            ddl_create_integration_layer_db.format(
                integration_layer_db_name=db_name,
                integration_layer_db_loc=db_loc))

        spark.sql(f'USE {db_name}')

        logging.info(f'{db_name} has been created.')

    except Exception as e:
        logging.error(f'Failed to create the {db_name} db in spark sql,{e}')
        spark.stop()
        raise Exception(f'Failed to create the {db_name}, {e}')