def fit_transform(self, X: BlockArray, y: BlockArray = None): _check_array(X, True) if y is not None: _check_array(y, True) y = y.flattened_oids()[0] r_oid = instance().cm.call_actor_method(self.actor, "fit_transform", X.flattened_oids()[0], y) return BlockArray.from_oid(r_oid, shape=X.shape, dtype=float, cm=instance().cm)
def median(self, a: BlockArray, axis=None, out=None, keepdims=False): """Compute median value of a BlockArray. Args: a: A BlockArray. Returns: The median value. """ if axis is not None: raise NotImplementedError("'axis' argument is not yet supported.") if out is not None: raise NotImplementedError("'out' argument is not yet supported.") if keepdims: raise NotImplementedError( "'keepdims' argument is not yet supported.") if a.ndim > 1: raise NotImplementedError( "Only 1D BlockArrays are current supported.") a_oids = a.flattened_oids() if a.size % 2 == 1: return self.quickselect(a_oids, a.size // 2) else: m_0 = self.quickselect(a_oids, a.size // 2 - 1) m_1 = self.quickselect(a_oids, a.size // 2) return (m_0 + m_1) / 2
def predict(self, X: BlockArray): _check_array(X, True) r_oid = instance().cm.call_actor_method(self.actor, "predict", X.flattened_oids()[0]) return BlockArray.from_oid(r_oid, shape=(X.shape[0], ), dtype=predict_dtype, cm=instance().cm)
def score(self, X: BlockArray, y: BlockArray, sample_weight: BlockArray = None): _check_array(X, True) _check_array(y, True) if sample_weight is not None: _check_array(sample_weight, True) sample_weight = sample_weight.flattened_oids()[0] r_oid = instance().cm.call_actor_method( self.actor, "score", X.flattened_oids()[0], y.flattened_oids()[0], sample_weight, ) return BlockArray.from_oid(r_oid, shape=(), dtype=float, cm=instance().cm)
def quantile( self, arr: BlockArray, q: float, interpolation="linear", method="tdigest" ) -> BlockArray: """Compute the q-th quantile of the array elements. Args: arr: BlockArray. q: quantile to compute, which must be between 0 and 1 inclusive. interpolation: interpolation method to use when the desired quantile lies between two data points i < j. also see https://numpy.org/doc/1.20/reference/generated/numpy.quantile.html. also see https://docs.dask.org/en/latest/_modules/dask/array/percentile.html. Returns: Returns the q-th quantile of the array elements. """ # pylint: disable = import-outside-toplevel, unused-import try: import crick except Exception as e: raise Exception( "Unable to import crick. \ Install crick with command 'pip install cython; pip install crick'" ) from e if arr.ndim != 1: raise NotImplementedError("Only 1D 'arr' is currently supported.") if q < 0.0 or q > 1.0: raise ValueError("Quantiles must be in the range [0, 1]") assert interpolation == "linear" assert method == "tdigest" arr_oids = arr.flattened_oids() num_arrs = len(arr_oids) q = [q] t_oids = [] for i, arr_oid in enumerate(arr_oids): syskwargs = { "grid_entry": (i,), "grid_shape": (num_arrs,), "options": {"num_returns": 1}, } t_oids.append(self.cm.tdigest_chunk(arr_oid, syskwargs=syskwargs)) p_oid = self.cm.percentiles_from_tdigest(q, *t_oids, syskwargs=syskwargs) return BlockArray.from_oid(p_oid, (1,), np.float64, self.cm)
def top_k(self, arr: BlockArray, k: int, largest=True) -> Tuple[BlockArray, BlockArray]: """Find the `k` largest or smallest elements of a BlockArray. If there are multiple kth elements that are equal in value, then no guarantees are made as to which ones are included in the top k. Args: arr: A BlockArray. k: Number of top elements to return. largest: Whether to return largest or smallest elements. Returns: A tuple containing two BlockArrays, (`values`, `indices`). values: Values of the top k elements, unsorted. indices: Indices of the top k elements, ordered by their corresponding values. """ if arr.ndim != 1: raise NotImplementedError("Only 1D 'arr' is currently supported.") if k <= 0 or arr.size < k: raise IndexError( "'k' must be at least 1 and at most the size of 'arr'.") arr_oids = arr.flattened_oids() if largest: k_oid = self.quickselect(arr_oids, k - 1) k_val = BlockArray.from_oid(k_oid, (1, ), arr.dtype, self.cm) ie_indices = self.where(arr > k_val[0])[0] else: k_oid = self.quickselect(arr_oids, -k) k_val = BlockArray.from_oid(k_oid, (1, ), arr.dtype, self.cm) ie_indices = self.where(arr < k_val[0])[0] eq_indices = self.where(arr == k_val[0])[0] eq_indices_pad = eq_indices[:k - ie_indices.size] axis_block_size = self.compute_block_shape((k, ), int)[0] indices = self.concatenate([ie_indices, eq_indices_pad], 0, axis_block_size) return arr[indices], indices
def median(self, arr: BlockArray) -> BlockArray: """Compute the median of a BlockArray. Args: a: A BlockArray. Returns: The median value. """ if arr.ndim != 1: raise NotImplementedError("Only 1D 'arr' is currently supported.") a_oids = arr.flattened_oids() if arr.size % 2 == 1: m_oid = self.quickselect(a_oids, arr.size // 2) return BlockArray.from_oid(m_oid, (1, ), arr.dtype, self.cm) else: m0_oid = self.quickselect(a_oids, arr.size // 2 - 1) m0 = BlockArray.from_oid(m0_oid, (1, ), arr.dtype, self.cm) m1_oid = self.quickselect(a_oids, arr.size // 2) m1 = BlockArray.from_oid(m1_oid, (1, ), arr.dtype, self.cm) return (m0 + m1) / 2
def fit(self, X: BlockArray, y: BlockArray): _check_array(X, True) _check_array(y, True) instance().cm.call_actor_method(self.actor, "fit", X.flattened_oids()[0], y.flattened_oids()[0])