Пример #1
0
def importData(spark:SparkSession, datapath:str, pattern:Optional[str]=None) -> list:
    """ get data from directories or files """
    if isinstance(datapath, str) and os.path.exists(datapath):
        return class_pyspark.Sparkclass(config={}).importData(spark, datapath, pattern)
Пример #2
0
def showMySchema(df:DataFrame, filename:str) -> None:
    """ saves a file of the schema  """
    if isinstance(df, DataFrame):
        class_pyspark.Sparkclass(config={}).debugDf(df, filename)
Пример #3
0
def sparkStart(conf:dict) -> SparkSession:
    """ start a spark session """
    if isinstance(conf, dict) and isinstance(conf, dict):
        return class_pyspark.Sparkclass(config={}).sparkStart(conf)
Пример #4
0
def openConfig(filepath:str) -> dict:
    """ opens the json configuration file  """
    if isinstance(filepath, str) and os.path.exists(filepath):
        return class_pyspark.Sparkclass(config={}).openJson(filepath)
Пример #5
0
def loadDeltaTables(listOfPaths:list) -> list:
    """ load data from delta tables for various reasons like historic or additional data sources 
        you could also include this function in our main function above rather than from our transformData function
    """
    return [(lambda x: class_pyspark.Sparkclass(config={}).loadTables(x[0], x[1], x[2])) (x) for x in listOfPaths]
Пример #6
0
def exportResult(listOfDf:list) -> None:
    """ input is a list of tuples (dataframe, "tablename"), write to various file formats including delta lake tables """
    c = [(lambda x: class_pyspark.Sparkclass(config={}).exportDf(x)) (x) for x in listOfDf]
Пример #7
0
def createTempTables(spark:SparkSession, listOfDf:list) -> None:
    """ input is a list of tuples (dataframe, "tablename"), create temporary SQL tables in memory"""
    c = [(lambda x: class_pyspark.Sparkclass(config={}).createTempTables(x)) (x) for x in listOfDf]
    d = [(lambda x: class_pyspark.Sparkclass(config={}).debugTables(x)) (x) for x in spark.catalog.listTables()]