def _build_host_dict(gpu_futures, client): """ Build a dictionary of hosts and their corresponding ports from workers which have the given gpu_futures. """ # TO DO: IMPROVE/ CLEANUP who_has = client.who_has(gpu_futures) workers = [key[0] for key in list(who_has.values())] hosts = set(map(lambda x: parse_host_port(x), workers)) hosts_dict = {} for host, port in hosts: if host not in hosts_dict: hosts_dict[host] = set([port]) else: hosts_dict[host].add(port) return hosts_dict
def drop_duplicates(ddf): client = default_client() if isinstance(ddf, dd.DataFrame): parts = ddf.to_delayed() parts = client.compute(parts) wait(parts) else: parts = ddf key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = client.who_has(parts) worker_map = [] for key, workers in who_has.items(): worker = parse_host_port(first(workers)) worker_map.append((worker, key_to_part_dict[key])) gpu_data = [client.submit(_drop_duplicates, part, workers=[worker]) for worker, part in worker_map] wait(gpu_data) return gpu_data
def _get_mg_info(ddf): # Get gpu data pointers of columns of each dataframe partition client = default_client() if isinstance(ddf, dd.DataFrame): parts = ddf.to_delayed() parts = client.compute(parts) wait(parts) else: parts = ddf key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = client.who_has(parts) worker_map = [] for key, workers in who_has.items(): worker = parse_host_port(first(workers)) worker_map.append((worker, key_to_part_dict[key])) gpu_data = [(worker, client.submit(to_gpu_array, part, workers=[worker])) for worker, part in worker_map] wait(gpu_data) return gpu_data