Пример #1
0
    def apply(self,
              data_points: RDD,
              fault_tolerant: bool = False) -> np.ndarray:
        """Label PySpark RDD of data points with LFs.

        Parameters
        ----------
        data_points
            PySpark RDD containing data points to be labeled by LFs
        fault_tolerant
            Output ``-1`` if LF execution fails?

        Returns
        -------
        np.ndarray
            Matrix of labels emitted by LFs
        """
        f_caller = _FunctionCaller(fault_tolerant)

        def map_fn(args: Tuple[DataPoint, int]) -> RowData:
            return apply_lfs_to_data_point(*args,
                                           lfs=self._lfs,
                                           f_caller=f_caller)

        labels = data_points.zipWithIndex().map(map_fn).collect()
        return self._numpy_from_row_data(labels)
Пример #2
0
    def apply(self, data_points: RDD) -> np.ndarray:
        """Label PySpark RDD of data points with LFs.

        Parameters
        ----------
        data_points
            PySpark RDD containing data points to be labeled by LFs

        Returns
        -------
        np.ndarray
            Matrix of labels emitted by LFs
        """
        def map_fn(args: Tuple[DataPoint, int]) -> RowData:
            return apply_lfs_to_data_point(*args, lfs=self._lfs)

        labels = data_points.zipWithIndex().map(map_fn).collect()
        return self._numpy_from_row_data(labels)