def get_stream(self, stream): ''' 流逻辑,必须实现 ''' return stream.flat_map(self.flat_map) \ .key_by(KeyBy()) \ .time_window(milliseconds(50)) \ .reduce(Reduce())
def main(factory): env = factory.get_execution_environment() env.create_python_source(Generator(num_iters=1000)) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(50)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): env = flink.get_execution_environment() env.from_collection(SomeIterator(constants.NUM_ITERATIONS_IN_TEST)) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): env = flink.get_execution_environment() env.create_python_source(Generator(num_iters=constants.NUM_ITERATIONS_IN_TEST)) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(50)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): env = flink.get_execution_environment() env.generate_sequence(1, constants.NUM_ITERATIONS_IN_TEST) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self): env = self._get_execution_environment() env.add_source(Generator(num_iters=constants.NUM_ITERATIONS_IN_TEST)) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(50)) \ .reduce(Sum()) \ .print() env.execute(True)
def run(self, flink): env = flink.get_execution_environment() split_window = env.generate_sequence(1, constants.NUM_ITERATIONS_IN_TEST).split(StreamSelector()) split_window.select('lower_stream') \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() split_window.select('upper_stream') \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self): env = self._get_execution_environment() env.from_collection(SomeIterator(constants.NUM_ITERATIONS_IN_TEST)) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .print() result = env.execute("MyJob", True) print("Job completed, job_id={}".format(result.jobID))
def run(self, flink): elements = ["aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ELEMENTS_IN_TEST)] env = flink.get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): env = flink.get_execution_environment() env.from_collection([3] * 5) \ .map(DummyTupple()) \ .map(MinusOne()) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(5)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): elements = [("Alice", 111) if iii % 2 == 0 else ("Bob", 2222) for iii in range(constants.NUM_ELEMENTS_IN_TEST)] env = flink.get_execution_environment() env.from_elements(*elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): elements = ["aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ITERATIONS_IN_TEST)] env = flink.get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .write_as_text("/tmp/flink_write_as_text", WriteMode.OVERWRITE) env.execute()
def run(self): env = PythonStreamExecutionEnvironment.get_execution_environment() env.add_source(Generator(num_iters=100)) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(30)) \ .reduce(Sum()) \ .print() print( "Execution mode: {}".format("LOCAL" if self._local else "REMOTE")) env.execute(self._local)
def run(self): elements = ["aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ELEMENTS_IN_TEST)] env = self._get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .print() result = env.execute("MyJob", True) print("Job completed, job_id={}".format(result.jobID))
def run(self): env = self._get_execution_environment() split_window = env.generate_sequence( 1, constants.NUM_ITERATIONS_IN_TEST).split(StreamSelector()) split_window.select('lower_stream') \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .print() split_window.select('upper_stream') \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .print() result = env.execute("MyJob", True) print("Job completed, job_id={}".format(result.jobID))
def run(self): env = self._get_execution_environment() env.from_collection([3] * 5) \ .map(DummyTupple()) \ .map(MinusOne()) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(5)) \ .reduce(Sum()) \ .print() result = env.execute("MyJob", True) print("Job completed, job_id={}".format(str(result.jobID)))
def run(self, flink): env = flink.get_execution_environment() seq1 = env.create_python_source(Generator(msg='Hello', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq2 = env.create_python_source(Generator(msg='World', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq3 = env.create_python_source(Generator(msg='Happy', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq1.union(seq2, seq3) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): elements = [ "aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ELEMENTS_IN_TEST) ] env = flink.get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self, flink): tmp_f = generate_tmp_text_file(constants.NUM_ELEMENTS_IN_TEST) try: env = flink.get_execution_environment() env.read_text_file(tmp_f.name) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(100)) \ .reduce(Sum()) \ .output() env.execute() finally: tmp_f.close() os.unlink(tmp_f.name)
def run(self, flink): elements = [ "aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ITERATIONS_IN_TEST) ] env = flink.get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .write_as_text("/tmp/flink_write_as_text", WriteMode.OVERWRITE) env.execute()
def run(self): elements = [ "aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ITERATIONS_IN_TEST) ] env = self._get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .write_as_text("/tmp/flink_write_as_text", WriteMode.OVERWRITE) result = env.execute("MyJob", True) print("Job completed, job_id={}".format(result.jobID))
def run(self, flink): env = flink.get_execution_environment() seq1 = env.create_python_source( Generator(msg='Hello', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq2 = env.create_python_source( Generator(msg='World', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq3 = env.create_python_source( Generator(msg='Happy', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq1.union(seq2, seq3) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .output() env.execute()
def run(self): env = self._get_execution_environment() seq1 = env.add_source( Generator(msg='Hello', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq2 = env.add_source( Generator(msg='World', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq3 = env.add_source( Generator(msg='Happy', num_iters=constants.NUM_ITERATIONS_IN_TEST)) seq1.union(seq2, seq3) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(10)) \ .reduce(Sum()) \ .print() result = env.execute("My python union stream test", True) print("Job completed, job_id={}".format(result.jobID))
def run(self): parameterTool = ParameterTool.fromArgs(sys.argv[1:]) props = parameterTool.getProperties() props.setProperty("bootstrap.servers", self._bootstrap_server) consumer = PythonFlinkKafkaConsumer09("kafka09-test", StringDeserializationSchema(), props) env = self._get_execution_environment() env.add_source(consumer) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(100)) \ .reduce(Sum()) \ .print() result = env.execute("Python consumer kafka09 test", True) print("Kafka09 consumer job completed, job_id={}".format(result.jobID))
def run(self, flink): port = utils.gen_free_port() SocketStringReader('', port, constants.NUM_ITERATIONS_IN_TEST).start() time.sleep(0.5) elements = [ "aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ITERATIONS_IN_TEST) ] env = flink.get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(50)) \ .reduce(Sum()) \ .write_to_socket('localhost', port, ToStringSchema()) env.execute()
def run(self): port = utils.gen_free_port() SocketStringReader('', port, constants.NUM_ITERATIONS_IN_TEST).start() time.sleep(0.5) elements = [ "aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ITERATIONS_IN_TEST) ] env = self._get_execution_environment() env.from_collection(elements) \ .flat_map(Tokenizer()) \ .key_by(Selector()) \ .time_window(milliseconds(50)) \ .reduce(Sum()) \ .write_to_socket('localhost', port, ToStringSchema()) result = env.execute("MyJob", True) print("Job completed, job_id={}".format(result.jobID))