def _get_mg_info(self, ddf): """ Given a Dask cuDF, extract number of dimensions and convert the pieces of the Dask cuDF into Numba arrays, which can be passed into the kNN algorithm. build a :param ddf: :return: """ client = default_client() if isinstance(ddf, dd.DataFrame): cols = len(ddf.columns) parts = ddf.to_delayed() parts = client.compute(parts) yield wait(parts) else: raise Exception("Input should be a Dask DataFrame") key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_map = [] for key, workers in who_has.items(): worker = parse_host_port(first(workers)) worker_map.append((worker, key_to_part_dict[key])) gpu_data = [(worker, client.submit(to_gpu_matrix, part, workers=[worker])) for worker, part in worker_map] yield wait(gpu_data) raise gen.Return((gpu_data, cols))
def get_endpoints(addr_ports): # Create endpoints to all other workers ucx = get_worker()._ucx for address, port in addr_ports: if address != get_worker().address: host, p = parse_host_port(address) ucx.get_endpoint(host, port)
def hosts_to_parts(futures): """ Builds an ordered dict mapping each host to their list of parts :param futures: list of (worker, part) tuples :return: """ w_to_p_map = OrderedDict() for w, p in futures: host, port = parse_host_port(w) host_key = (host, port) if host_key not in w_to_p_map: w_to_p_map[host_key] = [] w_to_p_map[host_key].append(p) return w_to_p_map
def _build_host_dict(self, gpu_futures, client): who_has = client.who_has(gpu_futures) key_to_host_dict = {} for key in who_has: key_to_host_dict[key] = parse_host_port(who_has[key][0]) hosts_to_key_dict = {} for key, host in key_to_host_dict.items(): if host not in hosts_to_key_dict: hosts_to_key_dict[host] = set([key]) else: hosts_to_key_dict[host].add(key) workers = [key[0] for key in list(who_has.values())] return build_host_dict(workers)
def _build_host_dict(gpu_futures, client): """ Helper function to build a dictionary mapping workers to parts that currently hold the parts of given futures. :param gpu_futures: :param client: :return: """ who_has = client.who_has(gpu_futures) key_to_host_dict = {} for key in who_has: key_to_host_dict[key] = parse_host_port(who_has[key][0]) hosts_to_key_dict = {} for key, host in key_to_host_dict.items(): if host not in hosts_to_key_dict: hosts_to_key_dict[host] = set([key]) else: hosts_to_key_dict[host].add(key) workers = [key[0] for key in list(who_has.values())] return build_host_dict(workers)
def _do_fit(self, X_df, y_df, dtype): client = default_client() # Finding location of parts of y_df to distribute columns of X_df loc_dict = {} yield wait(y_df) tt = yield client.who_has(y_df) location = tuple(tt.values()) for i in range(X_df.npartitions): part_number = eval(list(tt.keys())[i])[1] loc_dict[part_number] = parse_host_port(str(location[i])[:-3]) # Lets divide the columns evenly, matching the order of the labels part_size = ceil(X_df.shape[1] / X_df.npartitions) # We scatter delayed operations to gather columns on the workers scattered = [] coefs = [] for i in range(X_df.npartitions): up_limit = min((i + 1) * part_size, X_df.shape[1]) cols = X_df.columns.values[i * part_size:up_limit] loc_cudf = X_df[cols] yield wait(loc_cudf) scattered.append( client.submit(preprocess_on_worker, loc_cudf, workers=[loc_dict[i]])) yield wait(scattered) coefs.append( client.submit(dev_array_on_worker, up_limit - i * part_size, dtype=dtype, unique=np.random.randint(0, 1e6), workers=[loc_dict[i]])) yield wait(coefs) del (loc_cudf) # Break apart Dask.array/dataframe into chunks/parts # data_parts = map(delayed, scattered) data_parts = scattered label_parts = y_df.to_delayed() coef_parts = coefs # Arrange parts into pairs. This enforces co-locality parts = list(map(delayed, zip(data_parts, label_parts, coef_parts))) parts = client.compute(parts) # Start computation in the background yield wait(parts) for part in parts: if part.status == 'error': yield part # trigger error locally # A dict in the form of { part_key: part } key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_parts = {} for key, workers in who_has.items(): worker = parse_host_port(first(workers)) if worker not in worker_parts: worker_parts[worker] = [] worker_parts[worker].append(key_to_part_dict[key]) """ Create IP Handles on each worker hosting input data """ # Format of input_devarrays = ([(X, y)..], dev) input_devarrays = [(worker, client.submit(fit_to_device_arrays, part, workers=[worker])) for worker, part in worker_parts.items()] yield wait(input_devarrays) """ Gather IPC handles for each worker and call _fit() on each worker containing data. """ # Last worker is the only one that can have less items. exec_node = loc_dict[X_df.npartitions - 1] # Need to fetch parts on worker on_worker = list(filter(lambda x: x[0] == exec_node, input_devarrays)) not_on_worker = list( filter(lambda x: x[0] != exec_node, input_devarrays)) ipc_handles = [ client.submit(get_input_ipc_handles, future, workers=[a_worker]) for a_worker, future in not_on_worker ] raw_arrays = [future for a_worker, future in on_worker] # IPC Handles are loaded in separate threads on worker so they can be # used to make calls through cython # Calls _fit_on_worker defined in the bottom intercept = client.submit(_fit_on_worker, (ipc_handles, raw_arrays), self._build_params_map(), workers=[exec_node]) yield wait(intercept) coef_series = [ client.submit(coef_on_worker, coefs[i], i, X_df.shape[1], X_df.npartitions, loc_dict[i], workers=[loc_dict[i]]) for i in range(len(loc_dict)) ] # coef_on_worker(self, coef, locations, ncols, nparts, worker): raise gen.Return((coef_series, intercept, loc_dict))
def _do_predict(self, X_df, coefs, loc_dict, intercept, dtype): client = default_client() part_size = ceil(X_df.shape[1] / X_df.npartitions) # We scatter delayed operations to gather columns on the workers scattered = [] for i in range(X_df.npartitions): up_limit = min((i + 1) * part_size, X_df.shape[1]) cols = X_df.columns.values[i * part_size:up_limit] loc_cudf = X_df[cols] yield wait(loc_cudf) scattered.append( client.submit(preprocess_predict, loc_cudf, workers=[loc_dict[i]])) yield wait(scattered) del (loc_cudf) # Break apart Dask.array/dataframe into chunks/parts data_parts = scattered coef_parts = coefs.to_delayed() # Arrange parts into pairs. This enforces co-locality parts = list(map(delayed, zip(data_parts, coef_parts))) parts = client.compute(parts) # Start computation in the background yield wait(parts) for part in parts: if part.status == 'error': yield part # trigger error locally # A dict in the form of { part_key: part } key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_parts = {} for key, workers in who_has.items(): worker = parse_host_port(first(workers)) if worker not in worker_parts: worker_parts[worker] = [] worker_parts[worker].append(key_to_part_dict[key]) """ Create IP Handles on each worker hosting input data """ # Format of input_devarrays = ([(X, y)..], dev) input_devarrays = [(worker, client.submit(predict_to_device_arrays, part, worker, loc_dict, X_df.npartitions, dtype=dtype, workers=[worker])) for worker, part in worker_parts.items()] yield wait(input_devarrays) """ Gather IPC handles for each worker and call _fit() on each worker containing data. """ exec_node = loc_dict[X_df.npartitions - 1] # Need to fetch parts on worker on_worker = list(filter(lambda x: x[0] == exec_node, input_devarrays)) not_on_worker = list( filter(lambda x: x[0] != exec_node, input_devarrays)) ipc_handles = [ client.submit(get_input_ipc_handles, future, unique=np.random.randint(0, 1e6), workers=[a_worker]) for a_worker, future in not_on_worker ] raw_arrays = [future for a_worker, future in on_worker] # IPC Handles are loaded in separate threads on worker so they can be # used to make calls through cython # Calls _predict_on_worker defined in the bottom ret = client.submit(_predict_on_worker, (ipc_handles, raw_arrays), self.intercept, self._build_params_map(), workers=[exec_node]) yield wait(ret) dfs = [ client.submit(series_on_worker, f, worker, loc_dict, X_df.npartitions, X_df, workers=[worker]) for worker, f in input_devarrays ] return dfs
def _kneighbors(self, X, k): """ Internal function to query the kNN model. :param X: :param k: :return: """ client = default_client() if k is None: k = self.n_neighbors # Break apart Dask.array/dataframe into chunks/parts data_parts = X.to_delayed() parts = list(map(delayed, data_parts)) parts = client.compute(parts) # Start computation in the background yield wait(parts) for part in parts: if part.status == 'error': yield part # trigger error locally # A dict in the form of { part_key: part } key_to_part_dict = dict([(str(part.key), part) for part in parts]) who_has = yield client.who_has(parts) worker_parts = {} for key, workers in who_has.items(): worker = parse_host_port(first(workers)) if worker not in worker_parts: worker_parts[worker] = [] worker_parts[worker].append(key_to_part_dict[key]) """ Create IP Handles on each worker hosting input data """ # Format of input_devarrays = ([(X, y)..], dev) input_devarrays = [(worker, client.submit(input_to_device_arrays, part, {"k": k}, workers=[worker])) for worker, part in worker_parts.items()] yield wait(input_devarrays) """ Gather IPC handles for each worker and call _fit() on each worker containing data. """ exec_node, model = self.model # Need to fetch coefficient parts on worker on_worker = list(filter(lambda x: x[0] == exec_node, input_devarrays)) not_on_worker = list(filter(lambda x: x[0] != exec_node, input_devarrays)) ipc_handles = [client.submit(get_input_ipc_handles, future, workers=[a_worker]) for a_worker, future in not_on_worker] raw_arrays = [future for a_worker, future in on_worker] # IPC Handles are loaded in separate threads on worker so they can be # used to make calls through cython run = client.submit(_kneighbors_on_worker, (ipc_handles, raw_arrays), model, {"k": k}, workers=[exec_node]) yield wait(run) dfs = [client.submit(build_dask_dfs, f, {"k": k}, workers=[worker]) for worker, f in input_devarrays] yield wait(dfs) return gen.Return(dfs)