def main(): reader = ConfigReader("config.cfg", "postgres") db_config = reader.get_config() tracker = Tracker("Test job", db_config) reporter = Reporter(db_config, [ "b0ca6902-8d7b-49a2-9a17-b94e42e839fc", "c3a2320c-19f1-4051-aa5f-85fc58d39ac9", "e74252e6-558c-49c1-aa13-99be2cc59585" ]) reporter.get_report()
from pyspark.sql import SparkSession from pyspark.sql.types import StructType, StructField, DateType, \ StringType, TimestampType, DecimalType, IntegerType from configreader import ConfigReader from parsers import parse_line spark = SparkSession.builder.getOrCreate() spark.sparkContext.addPyFile("parsers.py") from azure.storage.blob import BlobServiceClient reader = ConfigReader("config.cfg", "azure-storage") config = reader.get_config() # Get Azure storage info from config storage_acct_name = config["account_name"] storage_acct_access_key = config["access_key"] storage_container = config["container_name"] mount_root = config["mount_root"] # Set Spark Azure storage account and key storage_acct_key_str = f"fs.azure.account.key.{storage_acct_name}.blob.core.windows.net" spark.conf.set(storage_acct_key_str, storage_acct_access_key) # Set base Spark filepath for container container_base_path = f"wasbs://{storage_container}@{storage_acct_name}.blob.core.windows.net" mount_base_path = f"{mount_root}/{storage_container}" # Set up container client blob_service_client = BlobServiceClient(account_url=f"https://{storage_acct_name}.blob.core.windows.net", \ credential=storage_acct_access_key)