示例#1
0
from common.utils import PipelineUtils
PipelineUtils.getSpark()
from delta.tables import * # ignore pylint error 

class DeltaUtils:
    @staticmethod
    def getDeltaTable(table):
        deltaConfig = PipelineUtils.getConfig()['storage']
        path=deltaConfig['tables'][table]["path"]
        spark = PipelineUtils.getSpark()
        return DeltaTable.forPath(spark, path)

    # static method for merging incremental updates  into Delta tables
    @staticmethod
    def upsertMicroBatchToDelta(tableName,microBatchOutputDF, whereClause="table.id = updates.id"):
        deltaTable = DeltaUtils.getDeltaTable(tableName)
        return deltaTable.alias("table").merge(microBatchOutputDF.alias("updates"), whereClause)\
                .whenMatchedUpdateAll()\
                .whenNotMatchedInsertAll()\
                .execute()
示例#2
0
 def getDeltaTable(table):
     deltaConfig = PipelineUtils.getConfig()['storage']
     path=deltaConfig['tables'][table]["path"]
     spark = PipelineUtils.getSpark()
     return DeltaTable.forPath(spark, path)
示例#3
0
 def sourceFromCassandra(table):
     return PipelineUtils.getSpark().read\
         .format("org.apache.spark.sql.cassandra")\
         .options(table=table, keyspace="elt")\
         .load()