Python PipelineUtils примеры использования

Язык программирования: Python

Пространство имен/Пакет: common.utils

Класс/Тип: PipelineUtils

Примеров на hotexamples.com: 7

Python PipelineUtils - 7 примеров найдено. Это лучшие примеры Python кода для common.utils.PipelineUtils, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

getConfig(3)

getSpark(3)

getZookeeperInstance(2)

Пример #1

Показать файл

 def save_offsets(rdd):
     print("Saving offset | Exactly Once Semantics")
     zk = PipelineUtils.getZookeeperInstance()
     for offset in rdd.offsetRanges():
         path = f"/consumers/{offset.topic}/{offset.partition}"
         zk.ensure_path(path)
         zk.set(path, str(offset.untilOffset).encode())

Пример #2

Показать файл

Файл: encounter_job.py Проект: antarahealth/openmrs-elt

 def voidFlatObs(encounter_ids):
     try:
         db = PipelineUtils.getConfig()['storage']['db']
         encounter_ids=','.join(map(str, encounter_ids))
         if db=="delta":
             deltaTable = DeltaUtils.getDeltaTable("flat_obs_orders")
             deltaTable.delete("encounter_id IN ({0})".format(encounter_ids))
         elif db=="cassandra":
             CassandraUtils.deleteFromCassandra("flat_obs_orders",encounter_ids)
                 
     except Exception as e:
         print("An unexpected error occurred while sinking FlatObs microbatch", e)
         raise

Пример #3

Показать файл

 def read_offsets(topics):
     try:
         zk = PipelineUtils.getZookeeperInstance()
         from_offsets = {}
         for topic in topics:
             for partition in zk.get_children(f'/consumers/{topic}'):
                 topic_partion = TopicAndPartition(topic, int(partition))
                 offset = int(zk.get(f'/consumers/{topic}/{partition}')[0])
                 from_offsets[topic_partion] = offset
         print("Previous offset -->", from_offsets)
         return from_offsets
     except Exception as e:
         print("An unexpected error occurred while reading offset", e)
         pass

Пример #4

Показать файл

Файл: encounter_job.py Проект: antarahealth/openmrs-elt

    def sinkFlatObs(microbatch, batchId):
        try:
            db = PipelineUtils.getConfig()['storage']['db']
            if db=="delta":
                patient_id = microbatch.select("patient_id").rdd.flatMap(lambda x: x).collect()
                whereClause = "table.patient_id IN ({0}) AND table.encounter_id = updates.encounter_id"\
                                    .format(','.join(map(str, patient_id)))
                print(whereClause)
                DeltaUtils.upsertMicroBatchToDelta("flat_obs_orders", # delta tablename
                                                microbatch, # microbatch
                                                whereClause # where clause condition
                                                )


            elif db=="cassandra":
                CassandraUtils.sinkToCassandra(microbatch, "flat_obs_orders", mode="append")
        except Exception as e:
            print("An unexpected error occurred while sinking FlatObs microbatch", e)
            raise

Пример #5

Показать файл

Файл: delta.py Проект: antarahealth/openmrs-elt

 def getDeltaTable(table):
     deltaConfig = PipelineUtils.getConfig()['storage']
     path=deltaConfig['tables'][table]["path"]
     spark = PipelineUtils.getSpark()
     return DeltaTable.forPath(spark, path)

Пример #6

Показать файл

Файл: delta.py Проект: antarahealth/openmrs-elt

from common.utils import PipelineUtils
PipelineUtils.getSpark()
from delta.tables import * # ignore pylint error 

class DeltaUtils:
    @staticmethod
    def getDeltaTable(table):
        deltaConfig = PipelineUtils.getConfig()['storage']
        path=deltaConfig['tables'][table]["path"]
        spark = PipelineUtils.getSpark()
        return DeltaTable.forPath(spark, path)

    # static method for merging incremental updates  into Delta tables
    @staticmethod
    def upsertMicroBatchToDelta(tableName,microBatchOutputDF, whereClause="table.id = updates.id"):
        deltaTable = DeltaUtils.getDeltaTable(tableName)
        return deltaTable.alias("table").merge(microBatchOutputDF.alias("updates"), whereClause)\
                .whenMatchedUpdateAll()\
                .whenNotMatchedInsertAll()\
                .execute()

Пример #7

Показать файл

Файл: cassandra.py Проект: antarahealth/openmrs-elt

 def sourceFromCassandra(table):
     return PipelineUtils.getSpark().read\
         .format("org.apache.spark.sql.cassandra")\
         .options(table=table, keyspace="elt")\
         .load()