Python get_worker примеры, dask.distributed.get_worker Python примеры использования

Пример #1

0

Показать файл

def run_indri(args, output, overwrite_threads=False):
    from subprocess import Popen, PIPE
    import os

    cancel = Variable('cancel', get_client())
    if cancel.get():
        return ('canceled', get_worker().address, 0, get_loadinfo())

    start = time.time()
    if overwrite_threads:
        processes = len(os.sched_getaffinity(0)) - 1
        args = (args[0], '-threads={}'.format(processes), *args[1:])

    with Popen(args, stdout=PIPE, stderr=PIPE) as proc:
        content = []
        for l in proc.stdout:
            content.append(l)
            if len(content) % 1000 != 0:
                continue
            if cancel.get():
                proc.kill()
                return ('killed', get_worker().address, time.time() - start,
                        get_loadinfo())

    with open(output, 'wb') as f:
        f.writelines(content)

    return ('completed', get_worker().address, time.time() - start,
            get_loadinfo())

Пример #2

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

def _func_init_nccl(sessionId, uniqueId):
    """
    Initialize ncclComm_t on worker

    Parameters
    ----------
    sessionId : str
                session identifier from a comms instance
    uniqueId : array[byte]
               The NCCL unique Id generated from the
               client.
    """

    worker = get_worker()
    raft_comm_state = get_raft_comm_state(
        sessionId=sessionId, state_object=get_worker()
    )
    wid = raft_comm_state["wid"]
    nWorkers = raft_comm_state["nworkers"]

    try:
        n = nccl()
        n.init(nWorkers, uniqueId, wid)
        raft_comm_state["nccl"] = n
    except Exception as e:
        worker.log_event(
            topic="error", msg=f"An error occurred initializing NCCL: {e}."
        )
        raise

Пример #3

0

Показать файл

Файл: test_ucx.py Проект: isVoid/cuml

        def get_endpoints(addr_ports):
            # Create endpoints to all other workers
            ucx = get_worker()._ucx

            for address, port in addr_ports:
                if address != get_worker().address:
                    host, p = parse_host_port(address)
                    ucx.get_endpoint(host, port)

Пример #4

0

Показать файл

Файл: test_ucx.py Проект: isVoid/cuml

        def build_ucx():
            # Create listener and cache on worker
            get_worker()._callback_invoked = False

            def mock_callback(ep):
                get_worker()._callback_invoked = True

            ucx = UCX.get(mock_callback)

            get_worker()._ucx = ucx
            return get_worker().address, ucx.listener_port()

Пример #5

0

Показать файл

def xgboost_postprocess(pkl_path):
    with open(pkl_path, 'rb') as rf:
        obj = pickle.load(rf)
    try:
        worker = get_worker()
        dp = None
        for plg in worker.plugins:
            if 'process' in plg:
                dp = worker.plugins[plg]
                break
        if dp is None:
            raise ValueError('No process plugin registered')
    except Exception as e:
        logger.error(str(e), exc_info=True)
        raise e

    objects = obj['content']
    objects = postprocess(dp.postprocess_model, dp.classes, objects)
    # remove empty strings returned from postprocess
    objects = [i for i in objects if i != '']

    obj['xgboost_content'] = objects
    with open(pkl_path, 'wb') as wf:
        pickle.dump(obj, wf)
    return pkl_path

Пример #6

0

Показать файл

    def _wrapped_function(function, *args, **kwargs):

        available_resources = kwargs['available_resources']
        per_worker_logging = kwargs.pop('per_worker_logging')

        gpu_assignments = kwargs.pop('gpu_assignments')

        # Set up the logging per worker if the flag is set to True.
        if per_worker_logging:

            # Each worker should have its own log file.
            kwargs['logger_path'] = '{}.log'.format(get_worker().id)

        if available_resources.number_of_gpus > 0:

            worker_id = distributed.get_worker().id

            available_resources._gpu_device_indices = (
                '0' if worker_id not in gpu_assignments else
                gpu_assignments[worker_id])

            logging.info(
                f'Launching a job with access to GPUs {available_resources._gpu_device_indices}'
            )

        return_value = _Multiprocessor.run(function, *args, **kwargs)
        return return_value

Пример #7

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

def _func_set_worker_as_nccl_root(sessionId, verbose):
    """
    Creates a persistent nccl uniqueId on the scheduler node.


    Parameters
    ----------
    sessionId : Associated session to attach the unique ID to.
    verbose : Indicates whether or not to emit additional information

    Return
    ------
    uniqueId : byte str
                NCCL uniqueId, associating this DASK worker as its root node.
    """
    worker = get_worker()
    if verbose:
        worker.log_event(
            topic="info",
            msg=f"Setting worker as NCCL root for session, '{sessionId}'",
        )

    nccl_uid = set_nccl_root(sessionId=sessionId, state_object=worker)

    if verbose:
        worker.log_event(
            topic="info", msg="Done setting scheduler as NCCL root."
        )

    return nccl_uid

Пример #8

0

Показать файл

Файл: dask.py Проект: rsdefever/openff-evaluator

    def _wrapped_function(function, *args, **kwargs):

        available_resources = kwargs["available_resources"]
        per_worker_logging = kwargs.pop("per_worker_logging")

        gpu_assignments = kwargs.pop("gpu_assignments")

        # Set up the logging per worker if the flag is set to True.
        if per_worker_logging:

            # Each worker should have its own log file.
            os.makedirs("worker-logs", exist_ok=True)
            kwargs["logger_path"] = os.path.join("worker-logs",
                                                 f"{get_worker().id}.log")

        if available_resources.number_of_gpus > 0:

            worker_id = distributed.get_worker().id

            available_resources._gpu_device_indices = (
                "0" if worker_id not in gpu_assignments else
                gpu_assignments[worker_id])

            logger.info(f"Launching a job with access to GPUs "
                        f"{available_resources._gpu_device_indices}")

        return_value = _Multiprocessor.run(function, *args, **kwargs)
        return return_value

Пример #9

0

Показать файл

Файл: ga_simulator.py Проект: vshekar/siouxfalls_enhanced

def evalOneMax(individual, lmbd=3):
    #lmbd = 3
    edge = '18_1'
    start_time = 57600
    end_time = 86400
    try:
        #rank = mp.current_process()._identity[0]
        #rank = scoop.worker.decode("utf-8")
        #rank = rank.replace(".", "")
        #rank = rank.replace(":","")
        #rank = random.randint(0, 100)
        get_worker().id
    except:
        rank = 0

    return run_sim(lmbd, edge, start_time, end_time, rank, individual)

Пример #10

0

Показать файл

def dask_to_fst(*args, **kwargs):
    import sys
    import logging
    from dask.distributed import get_worker

    #logger name is the same for all workers
    logger = logging.getLogger(logging_basename)

    #add handlers if none are present for this worker
    if not len(logger.handlers):
        command_line_args = args[2]
        logger.setLevel(command_line_args.log_level)
        logging.captureWarnings(True)
        #handlers
        worker_id = str(get_worker().id).lower()
        stream_handler = logging.StreamHandler(sys.stdout)
        file_handler = logging.FileHandler('logs/' + worker_id, 'w')
        #levels
        stream_handler.setLevel(command_line_args.log_level)
        file_handler.setLevel(command_line_args.log_level)
        #format
        formatter_stream = logging.Formatter(worker_id + '    %(message)s')
        stream_handler.setFormatter(formatter_stream)
        formatter_file = logging.Formatter(
            '%(asctime)s - %(name)s in %(funcName)s - %(levelname)s - %(message)s'
        )
        file_handler.setFormatter(formatter_file)
        #add handlers
        logger.addHandler(stream_handler)
        logger.addHandler(file_handler)

    return to_fst(*args, **kwargs)

Пример #11

0

Показать файл

Файл: dask.py Проект: xiaoahang/LightGBM

def _train_part(params, model_factory, list_of_parts, worker_address_to_port, return_model,
                time_out=120, **kwargs):
    local_worker_address = get_worker().address
    machine_list = ','.join([
        '%s:%d' % (urlparse(worker_address).hostname, port)
        for worker_address, port
        in worker_address_to_port.items()
    ])
    network_params = {
        'machines': machine_list,
        'local_listen_port': worker_address_to_port[local_worker_address],
        'time_out': time_out,
        'num_machines': len(worker_address_to_port)
    }
    params.update(network_params)

    # Concatenate many parts into one
    parts = tuple(zip(*list_of_parts))
    data = _concat(parts[0])
    label = _concat(parts[1])
    weight = _concat(parts[2]) if len(parts) == 3 else None

    try:
        model = model_factory(**params)
        model.fit(data, label, sample_weight=weight, **kwargs)
    finally:
        _safe_call(_LIB.LGBM_NetworkFree())

    return model if return_model else None

Пример #12

0

Показать файл

Файл: core_files.py Проект: tmip-emat/tmip-emat

 def __setstate__(self, state):
     # When we are running on a dask worker, functions
     # are executed in a different thread from the worker
     # itself, even if there is only one thread.  To prevent
     # problems with SQLite, we check if this is a worker and
     # if there is only one thread, in which case we can
     # safely ignore the fact that the database is accessed
     # from a different thread than where it is created.
     from dask.distributed import get_worker
     try:
         worker = get_worker()
     except ValueError:
         n_threads = -1
     else:
         n_threads = worker.nthreads
     database_path = state.pop('_sqlitedb_path_', None)
     database_readonly = state.pop('_sqlitedb_readonly_', False)
     self.__dict__ = state
     if database_path and not database_readonly:
         from ...database import SQLiteDB
         if os.path.exists(database_path):
             self.db = SQLiteDB(
                 database_path,
                 initialize='skip',
                 readonly=database_readonly,
                 check_same_thread=(n_threads != 1),
             )

Пример #13

0

Показать файл

Файл: link.py Проект: ilmcconnell/Cosmos

def construct_linked_kb(eids):
    try:
        worker = get_worker()
        dp = None
        for plg in worker.plugins:
            if 'Linking' in plg:
                dp = worker.plugins[plg]
                break
        if dp is None:
            raise Exception('No linking plugin registered')
        if eids is None:
            return None
        results = []
        for eid in eids:
            entity = dp.linker.kb.cui_to_entity[eid]
            result = {
                'id': entity.concept_id,
                'name': entity.canonical_name,
                'aliases': tuple(entity.aliases),
                'types': tuple(entity.types),
                'description': entity.definition
            }
            results.append(result)
        result_df = pd.DataFrame(results)
        return result_df
    except Exception as e:
        logger.error(str(e), exc_info=True)
        return None

Пример #14

0

Показать файл

Файл: link.py Проект: ilmcconnell/Cosmos

def link(content, score_threshold=0.8):
    try:
        worker = get_worker()
        dp = None
        for plg in worker.plugins:
            if 'Linking' in plg:
                dp = worker.plugins[plg]
                break
        if dp is None:
            raise Exception('No linking plugin registered')

        linking_result = dp.nlp(content)
        ent_set = set()
        nonlinked_list = set() # We'll only add one copy of the entity mention per paragraph.
        for ent in linking_result.ents:
            linked = False
            for ent_id, score in ent._.kb_ents:
                if score > score_threshold:
                    linked = True
                    if ent_id in ent_set:
                        continue
                    ent_set.add(ent_id)
                break
            if not linked:
                nonlinked_list.add(ent.text)

        ent_set = list(ent_set)
        nonlinked_list = list(nonlinked_list)
        return nonlinked_list, ent_set

    except Exception as e:
        logger.error(str(e), exc_info=True)
        return (None, None)

Пример #15

0

Показать файл

Файл: CARDAMOM_pwc_mr_fd_parallel_daks_only.py Проект: MPIBGC-TEE/bgc_md2

def func_chunk(chunk_ds):
    worker = get_worker()
    #    'memory_limit': '2GB',
    worker.memory_target_fraction = 0.95
    worker.memory_spill_fraction = 0.95
    worker.memory_pause_fraction = 0.95
    worker.memory_terminate_fraction = 0.95  # False

    #    print('chunk started:', chunk_ds.lat[0].data, chunk_ds.lon[0].data, flush=True)
    res_ds = nested_groupby_apply(chunk_ds, ['lat', 'lon', 'prob'],
                                  func_pwc_mr_fd)

    # group_by removes the dimensions mentioned, so the resulting ds is
    # lower dimensional, unfortunatley, map_blocks does not do that and so
    # putting the sub result datasets back together becomes technically difficult
    #    chunk_fake_ds = make_fake_ds(chunk_ds).chunk(sub_chunk_dict)
    #    sub_chunk_ds = chunk_ds.chunk(sub_chunk_dict)
    #    res_ds = xr.map_blocks(func_pwc_mr_fd, sub_chunk_ds, template=chunk_fake_ds)

    #    print(
    #        'chunk finished:',
    #        chunk_ds.lat[0].data, chunk_ds.lon[0].data, chunk_ds.prob[0].data,
    #        flush=True
    #    )
    #    write_to_logfile(
    #        'chunk finished,',
    #        "lat:", chunk_ds.lat[0].data,
    #        "lon:", chunk_ds.lon[0].data,
    #        "prob:", chunk_ds.prob[0].data
    #    )

    return res_ds

Пример #16

0

Показать файл

    def fake_remote_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
    ) -> TaskOutputData:
        # get the task data
        worker = get_worker()
        task = worker.tasks.get(worker.get_current_task())
        assert task is not None
        print(f"--> task {task=} started")
        cancel_event = Event(TaskCancelEventName.format(task.key))
        # tell the client we are started
        start_event = Event(_DASK_EVENT_NAME)
        start_event.set()
        # sleep a bit in case someone is aborting us
        print("--> waiting for task to be aborted...")
        cancel_event.wait(timeout=10)
        if cancel_event.is_set():
            # NOTE: asyncio.CancelledError is not propagated back to the client...
            print("--> raising cancellation error now")
            raise TaskCancelledError

        return TaskOutputData.parse_obj({"some_output_key": 123})

Пример #17

0

Показать файл

Файл: Backend.py Проект: lmoneta/root

        def dask_mapper(current_range):
            """
            Gets the paths to the file(s) in the current executor, then
            declares the headers found.

            Args:
                current_range (tuple): The current range of the dataset being
                    processed on the executor.

            Returns:
                function: The map function to be executed on each executor,
                complete with all headers needed for the analysis.
            """
            # Retrieve the current worker local directory
            localdir = get_worker().local_directory

            # Get and declare headers on each worker
            headers_on_executor = [
                os.path.join(localdir, os.path.basename(filepath))
                for filepath in headers
            ]
            Utils.declare_headers(headers_on_executor)

            # Get and declare shared libraries on each worker
            shared_libs_on_ex = [
                os.path.join(localdir, os.path.basename(filepath))
                for filepath in shared_libraries
            ]
            Utils.declare_shared_libraries(shared_libs_on_ex)

            return mapper(current_range)

Пример #18

0

Показать файл

Файл: comms.py Проект: rnyak/cuml

async def _func_ucp_create_endpoints(sessionId, worker_info):
    """
    Runs on each worker to create ucp endpoints to all other workers
    :param sessionId: uuid unique id for this instance
    :param worker_info: dict Maps worker address to rank & UCX port
    :param r: float a random number to stop the function from being cached
    """
    dask_worker = get_worker()
    local_address = dask_worker.address

    eps = [None] * len(worker_info)
    count = 1

    for k in worker_info:
        if str(k) != str(local_address):

            ip, port = parse_host_port(k)

            ep = await ucp.create_endpoint(ip,
                                           worker_info[k]["p"])

            eps[worker_info[k]["r"]] = ep
            count += 1

    worker_state(sessionId)["ucp_eps"] = eps

Пример #19

0

Показать файл

def worker_load(h5pyFileName, localNames, svNames, elements, allSums):

    from dask.distributed import get_worker

    worker = get_worker()

    worker._structures = {}
    worker._true_forces = {}

    with h5py.File(h5pyFileName, 'r') as h5pyFile:
        for struct in localNames:
            worker._structures[struct] = {}
            for sv in svNames:
                worker._structures[struct][sv] = {}

                for elem in elements:

                    worker._structures[struct][sv][elem] = {}

                    group = h5pyFile[struct][sv][elem]

                    energyData = np.array(group['energy'][()],
                                          dtype=np.float32)
                    forcesData = np.array(group['forces'][()],
                                          dtype=np.float32)

                    if (allSums) and (len(forcesData.shape) == 4):
                        forcesData = forcesData.sum(axis=0)

                    worker._structures[struct][sv][elem]['energy'] = energyData
                    worker._structures[struct][sv][elem]['forces'] = forcesData

            tvF = h5pyFile[struct].attrs['forces']
            worker._true_forces[struct] = np.array(tvF, dtype=np.float32)

Пример #20

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

def get_raft_comm_state(sessionId, state_object=None):
    """
    Retrieves cuML comms state on the scheduler node, for the given sessionId,
    creating a new session if it does not exist. If no session id is given,
    returns the state dict for all sessions.

    Parameters
    ----------
    sessionId : SessionId value to retrieve from the dask_scheduler instances
    state_object : Object (either Worker, or Scheduler) on which the raft
                   comm state will retrieved (or created)

    Returns
    -------

    session state : str
                    session state associated with sessionId
    """
    state_object = state_object if state_object is not None else get_worker()

    if not hasattr(state_object, "_raft_comm_state"):
        state_object._raft_comm_state = {}

    if (
        sessionId is not None
        and sessionId not in state_object._raft_comm_state
    ):
        state_object._raft_comm_state[sessionId] = {"ts": time.time()}

    if sessionId is not None:
        return state_object._raft_comm_state[sessionId]

    return state_object._raft_comm_state

Пример #21

0

Показать файл

Файл: core.py Проект: lgh0504/dask-lightgbm

def _train_part(params,
                model_factory,
                list_of_parts,
                worker_addresses,
                return_model,
                local_listen_port=12400,
                time_out=120,
                **kwargs):

    network_params = build_network_params(worker_addresses,
                                          get_worker().address,
                                          local_listen_port, time_out)
    params.update(network_params)

    # Concatenate many parts into one
    parts = tuple(zip(*list_of_parts))
    data = concat(parts[0])
    label = concat(parts[1])
    weight = concat(parts[2]) if len(parts) == 3 else None

    try:
        model = model_factory(**params)
        model.fit(data, label, sample_weight=weight)
    finally:
        _safe_call(_LIB.LGBM_NetworkFree())

    return model if return_model else None

Пример #22

0

Показать файл

Файл: utils.py Проект: minghao2016/perses

def call_anneal_method(remote_worker,
                       sampler_state,
                       lambdas,
                       noneq_trajectory_filename=None,
                       num_integration_steps=1,
                       return_timer=False,
                       return_sampler_state=False,
                       rethermalize=False,
                       compute_incremental_work=True):
    """
    this function calls LocallyOptimalAnnealing.anneal;
    since we can only map functions with parallelisms (no actors), we need to submit a function that calls
    the LocallyOptimalAnnealing.anneal method.
    """
    if remote_worker == 'remote':
        _class = distributed.get_worker()
    else:
        _class = remote_worker

    incremental_work, new_sampler_state, timer, _pass, endstate_corrections = _class.annealing_class.anneal(
        sampler_state=sampler_state,
        lambdas=lambdas,
        noneq_trajectory_filename=noneq_trajectory_filename,
        num_integration_steps=num_integration_steps,
        return_timer=return_timer,
        return_sampler_state=return_sampler_state,
        rethermalize=rethermalize,
        compute_incremental_work=compute_incremental_work)
    return incremental_work, new_sampler_state, timer, _pass, endstate_corrections

Пример #23

0

Показать файл

    def fake_sidecar_fct(
        docker_auth: DockerBasicAuth,
        service_key: str,
        service_version: str,
        input_data: TaskInputData,
        output_data_keys: TaskOutputDataSchema,
        log_file_url: AnyUrl,
        command: List[str],
        expected_annotations: Dict[str, Any],
    ) -> TaskOutputData:
        sub = Sub(TaskCancelEvent.topic_name())
        # get the task data
        worker = get_worker()
        task = worker.tasks.get(worker.get_current_task())
        assert task is not None
        print(f"--> task {task=} started")
        assert task.annotations == expected_annotations
        # sleep a bit in case someone is aborting us
        print("--> waiting for task to be aborted...")
        for msg in sub:
            assert msg
            print(f"--> received cancellation msg: {msg=}")
            cancel_event = TaskCancelEvent.parse_raw(msg)  # type: ignore
            assert cancel_event
            if cancel_event.job_id == task.key:
                print("--> raising cancellation error now")
                raise asyncio.CancelledError("task cancelled")

        return TaskOutputData.parse_obj({"some_output_key": 123})

Пример #24

0

Показать файл

Файл: core.py Проект: zhouyonglong/dask-lightgbm

def _fit_local(params,
               model_factory,
               list_of_parts,
               worker_addresses,
               return_model,
               local_listen_port=12400,
               listen_time_out=120,
               **kwargs):
    network_params = build_network_params(worker_addresses,
                                          get_worker().address,
                                          local_listen_port, listen_time_out)
    params = {**params, **network_params}

    # Prepare data
    if len(list_of_parts[0]) == 3:
        data, labels, weight = zip(*list_of_parts)
        weight = concat(weight)
    else:
        data, labels = zip(*list_of_parts)
        weight = None

    data = concat(data)  # Concatenate many parts into one
    labels = concat(labels)

    try:
        classifier = model_factory(**params)
        classifier.fit(data, labels, sample_weight=weight)
    finally:
        _safe_call(_LIB.LGBM_NetworkFree())

    if return_model:
        return classifier
    else:
        return None

Пример #25

0

Показать файл

Файл: mortgage_gquant_plugins.py Проект: rsohlot/gQuant

    def process(self, inputs):
        '''
        '''
        import cudf

        worker = None
        try:
            from dask.distributed import get_worker
            worker = get_worker()
        except (ValueError, ImportError):
            pass

        logname = convert(self.__class__.__name__)
        logmgr = MortgagePluginsLoggerMgr(worker, logname)
        logger = logmgr.get_logger()

        worker_name = ''
        if worker is not None:
            worker_name = 'WORKER {} '.format(worker.name)

        performance_path = self.conf['csvfile_perfdata']
        logger.info(worker_name + 'LOADING: {}'.format(performance_path))

        cols = list(self.addition.keys())
        dtypes = list(self.addition.values())
        mortgage_gdf = cudf.read_csv(performance_path,
                                     names=cols,
                                     dtype=dtypes,
                                     delimiter='|',
                                     skiprows=1)

        logmgr.cleanup()

        return mortgage_gdf

Пример #26

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

async def _func_ucp_create_endpoints(sessionId, worker_info):
    """
    Runs on each worker to create ucp endpoints to all other workers

    Parameters
    ----------
    sessionId : str
                uuid unique id for this instance
    worker_info : dict
                  Maps worker addresses to NCCL ranks & UCX ports
    """
    eps = [None] * len(worker_info)
    count = 1

    for k in worker_info:
        ip, port = parse_host_port(k)

        ep = await get_ucx().get_endpoint(ip, worker_info[k]["port"])

        eps[worker_info[k]["rank"]] = ep
        count += 1

    raft_comm_state = get_raft_comm_state(
        sessionId=sessionId, state_object=get_worker()
    )
    raft_comm_state["ucp_eps"] = eps

Пример #27

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

def _func_store_initial_state(nworkers, sessionId, uniqueId, wid):
    raft_comm_state = get_raft_comm_state(
        sessionId=sessionId, state_object=get_worker()
    )
    raft_comm_state["nccl_uid"] = uniqueId
    raft_comm_state["wid"] = wid
    raft_comm_state["nworkers"] = nworkers

Пример #28

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

def _func_build_handle(sessionId, streams_per_handle, verbose):
    """
    Builds a handle_t on the current worker given the initialized comms

    Parameters
    ----------
    sessionId : str id to reference state for current comms instance.
    streams_per_handle : int number of internal streams to create
    verbose : bool print verbose logging output
    """
    worker = get_worker()
    if verbose:
        worker.log_event(
            topic="info", msg="Finished injecting comms on handle."
        )

    handle = Handle(streams_per_handle)

    raft_comm_state = get_raft_comm_state(
        sessionId=sessionId, state_object=worker
    )

    workerId = raft_comm_state["wid"]
    nWorkers = raft_comm_state["nworkers"]

    nccl_comm = raft_comm_state["nccl"]
    inject_comms_on_handle_coll_only(
        handle, nccl_comm, nWorkers, workerId, verbose
    )
    raft_comm_state["handle"] = handle

Пример #29

0

Показать файл

 def wrapped(doc: str, *args, **kwargs):
     worker = get_worker()
     try:
         nlp = worker.nlp
     except AttributeError:
         nlp = spacy.load(model)
         worker.nlp = nlp
     return func(nlp(doc), *args, **kwargs)

Пример #30

0

Показать файл

Файл: comms.py Проект: venkywonka/raft

async def _func_init_all(
    sessionId, uniqueId, comms_p2p, worker_info, verbose, streams_per_handle
):
    worker = get_worker()
    raft_comm_state = get_raft_comm_state(
        sessionId=sessionId, state_object=worker
    )
    raft_comm_state["nccl_uid"] = uniqueId
    raft_comm_state["wid"] = worker_info[get_worker().address]["rank"]
    raft_comm_state["nworkers"] = len(worker_info)

    if verbose:
        worker.log_event(topic="info", msg="Initializing NCCL.")
        start = time.time()

    _func_init_nccl(sessionId, uniqueId)

    if verbose:
        elapsed = time.time() - start
        worker.log_event(
            topic="info", msg=f"NCCL Initialization took: {elapsed} seconds."
        )

    if comms_p2p:
        if verbose:
            worker.log_event(topic="info", msg="Initializing UCX Endpoints")

        if verbose:
            start = time.time()
        await _func_ucp_create_endpoints(sessionId, worker_info)

        if verbose:
            elapsed = time.time() - start
            msg = (
                f"Done initializing UCX endpoints."
                f"Took: {elapsed} seconds.\nBuilding handle."
            )
            worker.log_event(topic="info", msg=msg)

        _func_build_handle_p2p(sessionId, streams_per_handle, verbose)

        if verbose:
            worker.log_event(topic="info", msg="Done building handle.")

    else:
        _func_build_handle(sessionId, streams_per_handle, verbose)

Python get_worker примеры использования