Python TaskContext.get примеры использования

Язык программирования: Python

Пространство имен/Пакет: pyspark.taskcontext

Класс/Тип: TaskContext

Метод/Функция: get

Примеров на hotexamples.com: 7

Python TaskContext.get - 7 примеров найдено. Это лучшие примеры Python кода для pyspark.taskcontext.TaskContext.get, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

get(7)

_getOrCreate(4)

TaskContext(1)

_setTaskContext(1)

partitionId(1)

Пример #1

Показать файл

            def func_with_open_process_close(partition_id: Any,
                                             iterator: Iterator) -> Iterator:
                epoch_id = cast(TaskContext,
                                TaskContext.get()).getLocalProperty(
                                    "streaming.sql.batchId")
                if epoch_id:
                    int_epoch_id = int(epoch_id)
                else:
                    raise RuntimeError(
                        "Could not get batch id from TaskContext")

                # Check if the data should be processed
                should_process = True
                if open_exists:
                    should_process = f.open(
                        partition_id, int_epoch_id)  # type: ignore[union-attr]

                error = None

                try:
                    if should_process:
                        for x in iterator:
                            cast("SupportsProcess", f).process(x)
                except Exception as ex:
                    error = ex
                finally:
                    if close_exists:
                        f.close(error)  # type: ignore[union-attr]
                    if error:
                        raise error

                return iter([])

Пример #2

Показать файл

Файл: PY0demo.py Проект: WeichenXu123/spark-mpi-demo

    def ff(iter):
        partitionId = TaskContext.get().partitionId()

        with open(partDataPath, "w") as fp:
            for i in iter:
                fp.write(str(i) + "\n")
        #we need barrier here
        #sleep 1s for now to ensure all nodes datafile generated.
        time.sleep(1)

        if partitionId == 0:
            with open(hostFilePath, "w") as fp:
                fp.write(hosts)
            # NOTE: MPI require every node process run in the same working directory,
            # so I add `cd /tmp/` so every process will run in `/tmp`
            # without this, the default directory may not exist on other nodes and cause error.
            mpiCmd = "cd /tmp/;" + mpiRunPath + " -n " + str(numTasks) + " -f " +\
             hostFilePath + " " + mpiProgPath + " " +\
             partDataPath + " " + outputDataPath
            prc = Popen(mpiCmd, stdout=PIPE, stderr=PIPE, shell=True)
            stdout, stderr = prc.communicate()
            if prc.returncode != 0:
                raise Exception, "cmd:\n" + mpiCmd + "\ncmd ouput:\n" + stdout + "\ncmd err\n: " + stderr
            # I still read data from stdout,
            # later I will change to read from local file
            # but we need to address the issue that ensure mapping process-0 to the mpirun node.
            yield stdout

Пример #3

Показать файл

Файл: eggroll_util.py Проект: pangzx1/FATE1.1

def maybe_create_eggroll_client():
    """
    a tricky way to set eggroll client which may be used by spark tasks.
    WARM: This may be removed or adjusted in future!
    """
    import pickle
    from pyspark.taskcontext import TaskContext
    mode, eggroll_session = pickle.loads(
        bytes.fromhex(TaskContext.get().getLocalProperty(_EGGROLL_CLIENT)))
    build_eggroll_runtime(WorkMode(mode), eggroll_session)

Пример #4

Показать файл

def launchHorovodMPI(featureArrayFile, labelsFile):
    # later I will pass the two filepath args to the mpi cmd
    partitionId = TaskContext.get().partitionId()
    if partitionId == 0:
        # NOTE: MPI require every node process run in the same working directory,
        # so I add `cd /tmp/` so every process will run in `/tmp`
        # without this, the default directory may not exist on other nodes and cause error.
        mpiCmd = "cd /tmp/;mpirun -np 4 -H localhost:4 -bind-to none -map-by slot python hvd_run_mnist_training"
        prc = Popen(mpiCmd, stdout=PIPE, stderr=PIPE, shell=True)
        stdout, stderr = prc.communicate()
        if prc.returncode != 0:
        	raise Exception, "cmd:\n" + mpiCmd + "\ncmd ouput:\n" + stdout + "\ncmd err\n: " + stderr
        # I still read data from stdout,
        # later I will change to read from local file
        # but we need to address the issue that ensure mapping process-0 to the mpirun node.
        return stdout

Пример #5

Показать файл

Файл: eggroll_util.py Проект: xingxingt/eggroll

def maybe_create_eggroll_client():
    """
    a tricky way to set eggroll client which may be used by spark tasks.
    WARM: This may be removed or adjusted in future!
    """
    import pickle
    from pyspark.taskcontext import TaskContext
    mode, eggroll_session = pickle.loads(bytes.fromhex(TaskContext.get().getLocalProperty(_EGGROLL_CLIENT)))
    if mode == 1:
        from eggroll.api.cluster.eggroll import _EggRoll
        if _EggRoll.instance is None:
            from eggroll.api import ComputingEngine
            from eggroll.api.cluster.eggroll import _EggRoll
            eggroll_runtime = _EggRoll(eggroll_session=eggroll_session)
            eggroll_session.set_runtime(ComputingEngine.EGGROLL_DTABLE, eggroll_runtime)
    else:
        from eggroll.api.standalone.eggroll import Standalone
        Standalone(eggroll_session)

Пример #6

Показать файл

        def process_event(event, hint):
            # type: (Event, Hint) -> Optional[Event]
            with capture_internal_exceptions():
                integration = Hub.current.get_integration(
                    SparkWorkerIntegration)
                task_context = TaskContext.get()

                if integration is None or task_context is None:
                    return event

                event.setdefault("tags",
                                 {}).setdefault("stageId",
                                                task_context.stageId())
                event["tags"].setdefault("partitionId",
                                         task_context.partitionId())
                event["tags"].setdefault("attemptNumber",
                                         task_context.attemptNumber())
                event["tags"].setdefault("taskAttemptId",
                                         task_context.taskAttemptId())

                if task_context._localProperties:
                    if "sentry_app_name" in task_context._localProperties:
                        event["tags"].setdefault(
                            "app_name",
                            task_context._localProperties["sentry_app_name"])
                        event["tags"].setdefault(
                            "application_id",
                            task_context.
                            _localProperties["sentry_application_id"],
                        )

                    if "callSite.short" in task_context._localProperties:
                        event.setdefault("extra", {}).setdefault(
                            "callSite",
                            task_context._localProperties["callSite.short"])

            return event

Пример #7

Показать файл

def runHorovodMPI(iter):
    taskCtx = TaskContext.get()
    # assume only one element in the iterator.
    # so I fix the file name for now
    dataFilePath = "/tmp/mpiInputData"
    modelExportDir = "/tmp/modelExportDir_" + str(random.randint(0, 2 << 30))

    # Note:
    # change this to be a dbfs path
    destModelDir = "/tmp/model_" + str(random.randint(0, 2 << 30))

    for pdf in iter:
        table = pa.Table.from_pandas(pdf)
        # later will directly get pyarrow table from RDD.
        save_pyarrow_table(table, dataFilePath)

    taskCtx.barrier()
    partitionID = taskCtx.partitionId()
    if partitionID == 0:
        hostsList = [i.split(":")[0] for i in taskCtx.hosts()]
        localHost = hostsList[0]  # need a new API
        numProc = len(hostsList)

        # move local host to be first one.
        for i in range(0, numProc):
            if localHost == hostsList[i]:
                temp = hostsList[0]
                hostsList[0] = localHost
                hostsList[i] = temp
                break

        # do not generate host file, use simpler -H param instead.
        hostsListParam = ",".join(hostsList)

        # generate rank file
        rankFilePath = "/tmp/rankfile"
        with open(rankFilePath, "w") as rf:
            for i in range(0, numProc):
                rf.write("rank %d=%s slot=0-4" % (i, hostsList[i]))

        # NOTE:
        # Remember to change to real path
        mpiProgPath = "/tmp/hvd_run_mnist_training.py"

        # NOTE: specify mpi working dir "/tmp".
        # and note the horovod estimator will generate checkpoint dir
        # `mnist_convnet_model_${RANDOM_NUMBER}`
        # in the working dir.

        # NOTE:
        # Remember to add `sudo -u ubuntu` when run on databricks cluster
        # and change python path
        mpiCmd = "mpirun --wdir %s -np %d -H %s python %s %s %s" % (
            "/tmp",
            numProc,
            hostsListParam,
            #rankFilePath,
            mpiProgPath,
            dataFilePath,
            modelExportDir)
        prc = Popen(mpiCmd, stdout=PIPE, stderr=PIPE, shell=True)
        stdout, stderr = prc.communicate()
        if prc.returncode != 0:
            raise Exception, "cmd:\n" + mpiCmd + "\ncmd ouput:\n" + stdout + "\ncmd err\n: " + stderr

        # from tensorflow.contrib import predictor
        # predictor.from_saved_model(modelExportDir)

        # get the inner dir.
        modelDir = modelExportDir + os.listdir(modelExportDir)[0]

        copyModelCmd = "cp -r %s %s" % (modelDir, destModelDir)
        prc = Popen(copyModelCmd, stdout=PIPE, stderr=PIPE, shell=True)
        stdout, stderr = prc.communicate()
        if prc.returncode != 0:
            raise Exception, "cmd:\n" + mpiCmd + "\ncmd ouput:\n" + stdout + "\ncmd err\n: " + stderr

        taskCtx.barrier()

        return [destModelDir]
    else:
        taskCtx.barrier()
        return []