Пример #1
0
 def fit_transform(self, X: BlockArray, y: BlockArray = None):
     _check_array(X, True)
     if y is not None:
         _check_array(y, True)
         y = y.flattened_oids()[0]
     r_oid = instance().cm.call_actor_method(self.actor,
                                             "fit_transform",
                                             X.flattened_oids()[0], y)
     return BlockArray.from_oid(r_oid,
                                shape=X.shape,
                                dtype=float,
                                cm=instance().cm)
Пример #2
0
    def median(self, a: BlockArray, axis=None, out=None, keepdims=False):
        """Compute median value of a BlockArray.

        Args:
            a: A BlockArray.

        Returns:
            The median value.
        """
        if axis is not None:
            raise NotImplementedError("'axis' argument is not yet supported.")
        if out is not None:
            raise NotImplementedError("'out' argument is not yet supported.")
        if keepdims:
            raise NotImplementedError(
                "'keepdims' argument is not yet supported.")
        if a.ndim > 1:
            raise NotImplementedError(
                "Only 1D BlockArrays are current supported.")

        a_oids = a.flattened_oids()
        if a.size % 2 == 1:
            return self.quickselect(a_oids, a.size // 2)
        else:
            m_0 = self.quickselect(a_oids, a.size // 2 - 1)
            m_1 = self.quickselect(a_oids, a.size // 2)
            return (m_0 + m_1) / 2
Пример #3
0
 def predict(self, X: BlockArray):
     _check_array(X, True)
     r_oid = instance().cm.call_actor_method(self.actor, "predict",
                                             X.flattened_oids()[0])
     return BlockArray.from_oid(r_oid,
                                shape=(X.shape[0], ),
                                dtype=predict_dtype,
                                cm=instance().cm)
Пример #4
0
 def score(self,
           X: BlockArray,
           y: BlockArray,
           sample_weight: BlockArray = None):
     _check_array(X, True)
     _check_array(y, True)
     if sample_weight is not None:
         _check_array(sample_weight, True)
         sample_weight = sample_weight.flattened_oids()[0]
     r_oid = instance().cm.call_actor_method(
         self.actor,
         "score",
         X.flattened_oids()[0],
         y.flattened_oids()[0],
         sample_weight,
     )
     return BlockArray.from_oid(r_oid,
                                shape=(),
                                dtype=float,
                                cm=instance().cm)
Пример #5
0
    def quantile(
        self, arr: BlockArray, q: float, interpolation="linear", method="tdigest"
    ) -> BlockArray:
        """Compute the q-th quantile of the array elements.
        Args:
            arr: BlockArray.
            q: quantile to compute, which must be between 0 and 1 inclusive.
            interpolation: interpolation method to use when the desired quantile lies between two
            data points i < j.
            also see https://numpy.org/doc/1.20/reference/generated/numpy.quantile.html.
            also see https://docs.dask.org/en/latest/_modules/dask/array/percentile.html.


        Returns:
            Returns the q-th quantile of the array elements.
        """
        # pylint: disable = import-outside-toplevel, unused-import
        try:
            import crick
        except Exception as e:
            raise Exception(
                "Unable to import crick. \
                Install crick with command 'pip install cython; pip install crick'"
            ) from e

        if arr.ndim != 1:
            raise NotImplementedError("Only 1D 'arr' is currently supported.")
        if q < 0.0 or q > 1.0:
            raise ValueError("Quantiles must be in the range [0, 1]")
        assert interpolation == "linear"
        assert method == "tdigest"

        arr_oids = arr.flattened_oids()
        num_arrs = len(arr_oids)
        q = [q]
        t_oids = []

        for i, arr_oid in enumerate(arr_oids):
            syskwargs = {
                "grid_entry": (i,),
                "grid_shape": (num_arrs,),
                "options": {"num_returns": 1},
            }
            t_oids.append(self.cm.tdigest_chunk(arr_oid, syskwargs=syskwargs))

        p_oid = self.cm.percentiles_from_tdigest(q, *t_oids, syskwargs=syskwargs)
        return BlockArray.from_oid(p_oid, (1,), np.float64, self.cm)
Пример #6
0
    def top_k(self,
              arr: BlockArray,
              k: int,
              largest=True) -> Tuple[BlockArray, BlockArray]:
        """Find the `k` largest or smallest elements of a BlockArray.

        If there are multiple kth elements that are equal in value, then no guarantees are made as
        to which ones are included in the top k.

        Args:
            arr: A BlockArray.
            k: Number of top elements to return.
            largest: Whether to return largest or smallest elements.

        Returns:
            A tuple containing two BlockArrays, (`values`, `indices`).
            values: Values of the top k elements, unsorted.
            indices: Indices of the top k elements, ordered by their corresponding values.
        """
        if arr.ndim != 1:
            raise NotImplementedError("Only 1D 'arr' is currently supported.")
        if k <= 0 or arr.size < k:
            raise IndexError(
                "'k' must be at least 1 and at most the size of 'arr'.")
        arr_oids = arr.flattened_oids()
        if largest:
            k_oid = self.quickselect(arr_oids, k - 1)
            k_val = BlockArray.from_oid(k_oid, (1, ), arr.dtype, self.cm)
            ie_indices = self.where(arr > k_val[0])[0]
        else:
            k_oid = self.quickselect(arr_oids, -k)
            k_val = BlockArray.from_oid(k_oid, (1, ), arr.dtype, self.cm)
            ie_indices = self.where(arr < k_val[0])[0]
        eq_indices = self.where(arr == k_val[0])[0]
        eq_indices_pad = eq_indices[:k - ie_indices.size]
        axis_block_size = self.compute_block_shape((k, ), int)[0]
        indices = self.concatenate([ie_indices, eq_indices_pad], 0,
                                   axis_block_size)
        return arr[indices], indices
Пример #7
0
    def median(self, arr: BlockArray) -> BlockArray:
        """Compute the median of a BlockArray.

        Args:
            a: A BlockArray.

        Returns:
            The median value.
        """
        if arr.ndim != 1:
            raise NotImplementedError("Only 1D 'arr' is currently supported.")

        a_oids = arr.flattened_oids()
        if arr.size % 2 == 1:
            m_oid = self.quickselect(a_oids, arr.size // 2)
            return BlockArray.from_oid(m_oid, (1, ), arr.dtype, self.cm)
        else:
            m0_oid = self.quickselect(a_oids, arr.size // 2 - 1)
            m0 = BlockArray.from_oid(m0_oid, (1, ), arr.dtype, self.cm)
            m1_oid = self.quickselect(a_oids, arr.size // 2)
            m1 = BlockArray.from_oid(m1_oid, (1, ), arr.dtype, self.cm)
            return (m0 + m1) / 2
Пример #8
0
 def fit(self, X: BlockArray, y: BlockArray):
     _check_array(X, True)
     _check_array(y, True)
     instance().cm.call_actor_method(self.actor, "fit",
                                     X.flattened_oids()[0],
                                     y.flattened_oids()[0])