def window_join(self, other_stream, join_attribute, window_width): op = Operator(self.env.gen_operator_id(), OpType.WindowJoin, processor.WindowJoin, "WindowJoin", num_instances=self.env.config.parallelism) return self.__register(op)
def sink(self): """Closes the stream with a sink operator.""" op = Operator(self.env.gen_operator_id(), OpType.Sink, processor.Sink, "Sink", num_instances=self.env.config.parallelism) return self.__register(op)
def read_text_file(self, filepath): source_id = self.gen_operator_id() source_stream = DataStream(self, source_id) self.operators[source_id] = Operator(source_id, OpType.ReadTextFile, processor.ReadTextFile, "Read Text File", other=filepath) return source_stream
def source(self, source): source_id = self.gen_operator_id() source_stream = DataStream(self, source_id) self.operators[source_id] = Operator(source_id, OpType.Source, processor.Source, "Source", logic=source) return source_stream
def inspect(self, inspect_logic): """Inspects the content of the stream. Attributes: inspect_logic (function): The user-defined inspect function. """ op = Operator(self.env.gen_operator_id(), OpType.Inspect, processor.Inspect, "Inspect", inspect_logic, num_instances=self.env.config.parallelism) return self.__register(op)
def filter(self, filter_fn): """Applies a filter to the stream. Attributes: filter_fn (function): The user-defined filter function. """ op = Operator(self.env.gen_operator_id(), OpType.Filter, processor.Filter, "Filter", filter_fn, num_instances=self.env.config.parallelism) return self.__register(op)
def map(self, map_fn, name="Map"): """Applies a map operator to the stream. Attributes: map_fn (function): The user-defined logic of the map. """ op = Operator(self.env.gen_operator_id(), OpType.Map, processor.Map, name, map_fn, num_instances=self.env.config.parallelism) return self.__register(op)
def reduce(self, reduce_fn): """Applies a rolling sum operator to the stream. Attributes: sum_attribute_index (int): The index of the attribute to sum (assuming tuple records). """ op = Operator(self.env.gen_operator_id(), OpType.Reduce, processor.Reduce, "Sum", reduce_fn, num_instances=self.env.config.parallelism) return self.__register(op)
def key_by(self, key_selector): """Applies a key_by operator to the stream. Attributes: key_attribute_index (int): The index of the key attributed (assuming tuple records). """ op = Operator(self.env.gen_operator_id(), OpType.KeyBy, processor.KeyBy, "KeyBy", other=key_selector, num_instances=self.env.config.parallelism) return self.__register(op)
def flat_map(self, flatmap_fn): """Applies a flatmap operator to the stream. Attributes: flatmap_fn (function): The user-defined logic of the flatmap (e.g. split()). """ op = Operator(self.env.gen_operator_id(), OpType.FlatMap, processor.FlatMap, "FlatMap", flatmap_fn, num_instances=self.env.config.parallelism) return self.__register(op)
def sum(self, attribute_selector, state_keeper=None): """Applies a rolling sum operator to the stream. Attributes: sum_attribute_index (int): The index of the attribute to sum (assuming tuple records). """ op = Operator(self.env.gen_operator_id(), OpType.Sum, processor.Reduce, "Sum", _sum, other=attribute_selector, state_actor=state_keeper, num_instances=self.env.config.parallelism) return self.__register(op)