def build_source(streaming_context, func): """Build a StreamSource source from a source function. Args: streaming_context: Stream context func: A instance of `SourceFunction` Returns: A StreamSource """ j_stream = streaming_context._gateway_client. \ create_py_stream_source(function.serialize(func)) return StreamSource(j_stream, streaming_context, func)
def sink(self, func): """ Create a StreamSink with the given sink. Args: func: sink function. Returns: a StreamSink. """ if not isinstance(func, function.SinkFunction): func = function.SimpleSinkFunction(func) j_func = self._gateway_client().create_py_func( function.serialize(func)) j_stream = self._gateway_client(). \ call_method(self._j_stream, "sink", j_func) return StreamSink(self, j_stream, func)
def key_by(self, func): """ Creates a new :class:`KeyDataStream` that uses the provided key to partition data stream by key. Args: func: The KeyFunction that is used for extracting the key for partitioning. If `func` is a python function instead of a subclass of KeyFunction, it will be wrapped as SimpleKeyFunction. Returns: A KeyDataStream """ if not isinstance(func, function.KeyFunction): func = function.SimpleKeyFunction(func) j_func = self._gateway_client().create_py_func( function.serialize(func)) j_stream = self._gateway_client(). \ call_method(self._j_stream, "keyBy", j_func) return KeyDataStream(self, j_stream)
def map(self, func): """ Applies a Map transformation on a :class:`DataStream`. The transformation calls a :class:`ray.streaming.function.MapFunction` for each element of the DataStream. Args: func: The MapFunction that is called for each element of the DataStream. If `func` is a python function instead of a subclass of MapFunction, it will be wrapped as SimpleMapFunction. Returns: A new data stream transformed by the MapFunction. """ if not isinstance(func, function.MapFunction): func = function.SimpleMapFunction(func) j_func = self._gateway_client().create_py_func( function.serialize(func)) j_stream = self._gateway_client(). \ call_method(self._j_stream, "map", j_func) return DataStream(self, j_stream)
def filter(self, func): """ Applies a Filter transformation on a :class:`DataStream`. The transformation calls a :class:`ray.streaming.function.FilterFunction` for each element of the DataStream. DataStream and retains only those element for which the function returns True. Args: func: The FilterFunction that is called for each element of the DataStream. If `func` is a python function instead of a subclass of FilterFunction, it will be wrapped as SimpleFilterFunction. Returns: The filtered DataStream """ if not isinstance(func, function.FilterFunction): func = function.SimpleFilterFunction(func) j_func = self._gateway_client().create_py_func( function.serialize(func)) j_stream = self._gateway_client(). \ call_method(self._j_stream, "filter", j_func) return DataStream(self, j_stream)
def flat_map(self, func): """ Applies a FlatMap transformation on a :class:`DataStream`. The transformation calls a :class:`ray.streaming.function.FlatMapFunction` for each element of the DataStream. Each FlatMapFunction call can return any number of elements including none. Args: func: The FlatMapFunction that is called for each element of the DataStream. If `func` is a python function instead of a subclass of FlatMapFunction, it will be wrapped as SimpleFlatMapFunction. Returns: The transformed DataStream """ if not isinstance(func, function.FlatMapFunction): func = function.SimpleFlatMapFunction(func) j_func = self._gateway_client().create_py_func( function.serialize(func)) j_stream = self._gateway_client(). \ call_method(self._j_stream, "flatMap", j_func) return DataStream(self, j_stream)
def reduce(self, func): """ Applies a reduce transformation on the grouped data stream grouped on by the given key function. The :class:`ray.streaming.function.ReduceFunction` will receive input values based on the key value. Only input values with the same key will go to the same reducer. Args: func: The ReduceFunction that will be called for every element of the input values with the same key. If `func` is a python function instead of a subclass of ReduceFunction, it will be wrapped as SimpleReduceFunction. Returns: A transformed DataStream. """ if not isinstance(func, function.ReduceFunction): func = function.SimpleReduceFunction(func) j_func = self._gateway_client().create_py_func( function.serialize(func)) j_stream = self._gateway_client(). \ call_method(self._j_stream, "reduce", j_func) return DataStream(self, j_stream)