Пример #1
0
def _build_host_dict(gpu_futures, client):
    """
    Build a dictionary of hosts and their corresponding ports from workers
    which have the given gpu_futures.
    """
    # TO DO: IMPROVE/ CLEANUP
    who_has = client.who_has(gpu_futures)

    workers = [key[0] for key in list(who_has.values())]
    hosts = set(map(lambda x: parse_host_port(x), workers))
    hosts_dict = {}
    for host, port in hosts:
        if host not in hosts_dict:
            hosts_dict[host] = set([port])
        else:
            hosts_dict[host].add(port)

    return hosts_dict
Пример #2
0
def drop_duplicates(ddf):
    client = default_client()

    if isinstance(ddf, dd.DataFrame):
        parts = ddf.to_delayed()
        parts = client.compute(parts)
        wait(parts)
    else:
        parts = ddf
    key_to_part_dict = dict([(str(part.key), part) for part in parts])
    who_has = client.who_has(parts)
    worker_map = []
    for key, workers in who_has.items():
        worker = parse_host_port(first(workers))
        worker_map.append((worker, key_to_part_dict[key]))

    gpu_data = [client.submit(_drop_duplicates, part, workers=[worker])
                for worker, part in worker_map]

    wait(gpu_data)
    return gpu_data
Пример #3
0
def _get_mg_info(ddf):
    # Get gpu data pointers of columns of each dataframe partition

    client = default_client()

    if isinstance(ddf, dd.DataFrame):
        parts = ddf.to_delayed()
        parts = client.compute(parts)
        wait(parts)
    else:
        parts = ddf
    key_to_part_dict = dict([(str(part.key), part) for part in parts])
    who_has = client.who_has(parts)
    worker_map = []
    for key, workers in who_has.items():
        worker = parse_host_port(first(workers))
        worker_map.append((worker, key_to_part_dict[key]))

    gpu_data = [(worker, client.submit(to_gpu_array, part, workers=[worker]))
                for worker, part in worker_map]

    wait(gpu_data)
    return gpu_data