def apply(self, data_points: RDD, fault_tolerant: bool = False) -> np.ndarray: """Label PySpark RDD of data points with LFs. Parameters ---------- data_points PySpark RDD containing data points to be labeled by LFs fault_tolerant Output ``-1`` if LF execution fails? Returns ------- np.ndarray Matrix of labels emitted by LFs """ f_caller = _FunctionCaller(fault_tolerant) def map_fn(args: Tuple[DataPoint, int]) -> RowData: return apply_lfs_to_data_point(*args, lfs=self._lfs, f_caller=f_caller) labels = data_points.zipWithIndex().map(map_fn).collect() return self._numpy_from_row_data(labels)
def apply(self, data_points: RDD) -> np.ndarray: """Label PySpark RDD of data points with LFs. Parameters ---------- data_points PySpark RDD containing data points to be labeled by LFs Returns ------- np.ndarray Matrix of labels emitted by LFs """ def map_fn(args: Tuple[DataPoint, int]) -> RowData: return apply_lfs_to_data_point(*args, lfs=self._lfs) labels = data_points.zipWithIndex().map(map_fn).collect() return self._numpy_from_row_data(labels)