Python Client.who_has примеры использования

Язык программирования: Python

Пространство имен/Пакет: dask.distributed

Класс/Тип: Client

Метод/Функция: who_has

Примеров на hotexamples.com: 4

Python Client.who_has - 4 примера найдено. Это лучшие примеры Python кода для dask.distributed.Client.who_has, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Client(30)

gather(30)

run(30)

map(30)

scatter(30)

submit(30)

shutdown(30)

close(30)

compute(30)

persist(27)

scheduler_info(24)

restart(23)

upload_file(21)

run_on_scheduler(17)

wait_for_workers(17)

ncores(16)

has_what(13)

get(10)

get_versions(9)

register_worker_callbacks(8)

register_worker_plugin(8)

cancel(7)

nthreads(6)

processing(5)

sync(5)

current(4)

who_has(4)

publish_dataset(3)

get_task_stream(3)

recreate_error_locally(2)

get_dataset(2)

write_scheduler_file(2)

rebalance(2)

retire_workers(2)

_get_scheduler_info(1)

get_registered_workers(1)

as_current(1)

futures_of(1)

get_collection(1)

tempdir_object(1)

start_ipython_workers(1)

start_workers(1)

profile(1)

replicate(1)

set_collection(1)

scheduler_status(1)

get_worker_logs(1)

list_datasets(1)

set_metadata(1)

Пример #1

Показать файл

Файл: dask_wrapper.py Проект: samplchallenges/SAMPL-league

class MyDaskClient():
    def __init__(self, address=None):
        self._client = Client(address)

    def _who_has(self, key):
        who_has_dict = self._client.who_has()
        if key in who_has_dict:
            return {"key": key, "worker": who_has_dict[key]}

    def get_status(self, key):
        # first we check if a worker has it
        processing_dict = self._client.processing()
        for worker in processing_dict.keys():
            if key in processing_dict[worker]:
                return {"status": "running", "worker": worker}
        # then we check if the task is in the stream
        for task in reversed(self._client.get_task_stream()):
            if task["key"] == key:
                return {"status": "done", "dask_status": task["status"]}

Пример #2

Показать файл

Файл: process_data.py Проект: savitamittal1/MachineLearningNotebooks-1

def main():
    #print('XGBOOST_BUILD_DOC is ' + os.environ['XGBOOST_BUILD_DOC'])
    parser = argparse.ArgumentParser("rapidssample")
    parser.add_argument("--data_dir", type=str, help="location of data")
    parser.add_argument("--num_gpu", type=int, help="Number of GPUs to use", default=1)
    parser.add_argument("--part_count", type=int, help="Number of data files to train against", default=2)
    parser.add_argument("--end_year", type=int, help="Year to end the data load", default=2000)
    parser.add_argument("--cpu_predictor", type=str, help="Flag to use CPU for prediction", default='False')
    parser.add_argument('-f', type=str, default='') # added for notebook execution scenarios
    args = parser.parse_args()
    data_dir = args.data_dir
    num_gpu = args.num_gpu
    part_count = args.part_count
    end_year = args.end_year
    cpu_predictor = args.cpu_predictor.lower() in ('yes', 'true', 't', 'y', '1')

    if cpu_predictor:
        print('Training with CPUs require num gpu = 1')
        num_gpu = 1

    print('data_dir = {0}'.format(data_dir))
    print('num_gpu = {0}'.format(num_gpu))
    print('part_count = {0}'.format(part_count))
    #part_count = part_count + 1 # adding one because the usage below is not inclusive
    print('end_year = {0}'.format(end_year))
    print('cpu_predictor = {0}'.format(cpu_predictor))
    
    import subprocess

    cmd = "hostname --all-ip-addresses"
    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
    output, error = process.communicate()
    IPADDR = str(output.decode()).split()[0]
    
    cluster = LocalCUDACluster(ip=IPADDR,n_workers=num_gpu)
    client = Client(cluster)
    client
    print(client.ncores())

# to download data for this notebook, visit https://rapidsai.github.io/demos/datasets/mortgage-data and update the following paths accordingly
    acq_data_path = "{0}/acq".format(data_dir) #"/rapids/data/mortgage/acq"
    perf_data_path = "{0}/perf".format(data_dir) #"/rapids/data/mortgage/perf"
    col_names_path = "{0}/names.csv".format(data_dir) # "/rapids/data/mortgage/names.csv"
    start_year = 2000
#end_year = 2000 # end_year is inclusive -- converted to parameter
#part_count = 2 # the number of data files to train against -- converted to parameter

    client.run(initialize_rmm_pool)
    client
    print(client.ncores())
# NOTE: The ETL calculates additional features which are then dropped before creating the XGBoost DMatrix.
# This can be optimized to avoid calculating the dropped features.
    print("Reading ...")
    t1 = datetime.datetime.now()
    gpu_dfs = []
    gpu_time = 0
    quarter = 1
    year = start_year
    count = 0
    while year <= end_year:
        for file in glob(os.path.join(perf_data_path + "/Performance_" + str(year) + "Q" + str(quarter) + "*")):
            if count < part_count:
                gpu_dfs.append(process_quarter_gpu(client, col_names_path, acq_data_path, year=year, quarter=quarter, perf_file=file))
                count += 1
                print('file: {0}'.format(file))
                print('count: {0}'.format(count))
        quarter += 1
        if quarter == 5:
            year += 1
            quarter = 1
            
    wait(gpu_dfs)
    t2 = datetime.datetime.now()
    print("Reading time ...")
    print(t2-t1)
    print('len(gpu_dfs) is {0}'.format(len(gpu_dfs)))
    
    client.run(cudf._gdf.rmm_finalize)
    client.run(initialize_rmm_no_pool)
    client
    print(client.ncores())
    dxgb_gpu_params = {
        'nround':            100,
        'max_depth':         8,
        'max_leaves':        2**8,
        'alpha':             0.9,
        'eta':               0.1,
        'gamma':             0.1,
        'learning_rate':     0.1,
        'subsample':         1,
        'reg_lambda':        1,
        'scale_pos_weight':  2,
        'min_child_weight':  30,
        'tree_method':       'gpu_hist',
        'n_gpus':            1, 
        'distributed_dask':  True,
        'loss':              'ls',
        'objective':         'gpu:reg:linear',
        'max_features':      'auto',
        'criterion':         'friedman_mse',
        'grow_policy':       'lossguide',
        'verbose':           True
    }
      
    if cpu_predictor:
        print('Training using CPUs')
        dxgb_gpu_params['predictor'] = 'cpu_predictor'
        dxgb_gpu_params['tree_method'] = 'hist'
        dxgb_gpu_params['objective'] = 'reg:linear'
        
    else:
        print('Training using GPUs')
    
    print('Training parameters are {0}'.format(dxgb_gpu_params))
    
    gpu_dfs = [delayed(DataFrame.from_arrow)(gpu_df) for gpu_df in gpu_dfs[:part_count]]
    gpu_dfs = [gpu_df for gpu_df in gpu_dfs]
    wait(gpu_dfs)
    
    tmp_map = [(gpu_df, list(client.who_has(gpu_df).values())[0]) for gpu_df in gpu_dfs]
    new_map = {}
    for key, value in tmp_map:
        if value not in new_map:
            new_map[value] = [key]
        else:
            new_map[value].append(key)
    
    del(tmp_map)
    gpu_dfs = []
    for list_delayed in new_map.values():
        gpu_dfs.append(delayed(cudf.concat)(list_delayed))
    
    del(new_map)
    gpu_dfs = [(gpu_df[['delinquency_12']], gpu_df[delayed(list)(gpu_df.columns.difference(['delinquency_12']))]) for gpu_df in gpu_dfs]
    gpu_dfs = [(gpu_df[0].persist(), gpu_df[1].persist()) for gpu_df in gpu_dfs]
    
    gpu_dfs = [dask.delayed(xgb.DMatrix)(gpu_df[1], gpu_df[0]) for gpu_df in gpu_dfs]
    gpu_dfs = [gpu_df.persist() for gpu_df in gpu_dfs]
    gc.collect()
    wait(gpu_dfs)
    
    labels = None
    t1 = datetime.datetime.now()
    bst = dxgb_gpu.train(client, dxgb_gpu_params, gpu_dfs, labels, num_boost_round=dxgb_gpu_params['nround'])
    t2 = datetime.datetime.now()
    print("Training time ...")
    print(t2-t1)
    print('str(bst) is {0}'.format(str(bst)))
    print('Exiting script')

Пример #3

Показать файл

    }

    # #### Load the data from host memory, and convert to CSR

    # In[ ]:

    # %%time

    gpu_dfs = [
        delayed(DataFrame.from_arrow)(gpu_df)
        for gpu_df in gpu_dfs[:part_count]
    ]
    gpu_dfs = [gpu_df for gpu_df in gpu_dfs]
    wait(gpu_dfs)

    tmp_map = [(gpu_df, list(client.who_has(gpu_df).values())[0])
               for gpu_df in gpu_dfs]
    new_map = {}
    for key, value in tmp_map:
        if value not in new_map:
            new_map[value] = [key]
        else:
            new_map[value].append(key)

    del (tmp_map)
    gpu_dfs = []
    for list_delayed in new_map.values():
        gpu_dfs.append(delayed(cudf.concat)(list_delayed))

    del (new_map)
    gpu_dfs = [

Пример #4

Показать файл

Файл: testdask.py Проект: aminnj/redis-htcondor

c.gather(c.map(lambda x: get_worker().array_cache.clear(),workers,workers=workers))

# start
c.get_task_stream()
# print(get_mll_hist(chunks[0]))
t0 = time.time()
futures = c.map(get_mll_hist,chunks)
results = c.gather(futures)
t1 = time.time()
print(len(results),"results")
print(t1-t0)
task_stream = c.get_task_stream(start=t0,stop=t1)
print("task_stream length",len(task_stream))
pd.DataFrame(task_stream).drop("type",axis=1).to_json("data/dask_cold_{}.json".format(trial))

d = c.who_has(futures)
# chunk_workers = list(zip(chunks,[d[f.key] for f in futures]))
workers = [d[f.key][0] for f in futures]
print(workers)

c.get_task_stream()
t0 = time.time()
# pure=False to avoid caching of the *results*
futures = [c.submit(get_mll_hist,chunk,pure=False,workers=worker,allow_other_workers=True) for chunk,worker in zip(chunks,workers)]
# futures = c.map(get_mll_hist,chunks,workers=workers,pure=False)
results = c.gather(futures)
t1 = time.time()
print(len(results),"results")
print(t1-t0)
task_stream = c.get_task_stream(start=t0,stop=t1)
print("task_stream length",len(task_stream))