Exemplo n.º 1
0
def importData(spark:SparkSession, datapath:str, pattern:Optional[str]=None) -> list:
    """ get data from directories or files """
    if isinstance(datapath, str) and os.path.exists(datapath):
        return class_pyspark.Sparkclass(config={}).importData(spark, datapath, pattern)
Exemplo n.º 2
0
def showMySchema(df:DataFrame, filename:str) -> None:
    """ saves a file of the schema  """
    if isinstance(df, DataFrame):
        class_pyspark.Sparkclass(config={}).debugDf(df, filename)
Exemplo n.º 3
0
def sparkStart(conf:dict) -> SparkSession:
    """ start a spark session """
    if isinstance(conf, dict) and isinstance(conf, dict):
        return class_pyspark.Sparkclass(config={}).sparkStart(conf)
Exemplo n.º 4
0
def openConfig(filepath:str) -> dict:
    """ opens the json configuration file  """
    if isinstance(filepath, str) and os.path.exists(filepath):
        return class_pyspark.Sparkclass(config={}).openJson(filepath)
Exemplo n.º 5
0
def loadDeltaTables(listOfPaths:list) -> list:
    """ load data from delta tables for various reasons like historic or additional data sources 
        you could also include this function in our main function above rather than from our transformData function
    """
    return [(lambda x: class_pyspark.Sparkclass(config={}).loadTables(x[0], x[1], x[2])) (x) for x in listOfPaths]
Exemplo n.º 6
0
def exportResult(listOfDf:list) -> None:
    """ input is a list of tuples (dataframe, "tablename"), write to various file formats including delta lake tables """
    c = [(lambda x: class_pyspark.Sparkclass(config={}).exportDf(x)) (x) for x in listOfDf]
Exemplo n.º 7
0
def createTempTables(spark:SparkSession, listOfDf:list) -> None:
    """ input is a list of tuples (dataframe, "tablename"), create temporary SQL tables in memory"""
    c = [(lambda x: class_pyspark.Sparkclass(config={}).createTempTables(x)) (x) for x in listOfDf]
    d = [(lambda x: class_pyspark.Sparkclass(config={}).debugTables(x)) (x) for x in spark.catalog.listTables()]