def word_count(): content = "line Licensed to the Apache Software Foundation ASF under one " \ "line or more contributor license agreements See the NOTICE file " \ "line distributed with this work for additional information " \ "line regarding copyright ownership The ASF licenses this file " \ "to you under the Apache License Version the " \ "License you may not use this file except in compliance " \ "with the License" t_env = TableEnvironment.create(EnvironmentSettings.in_batch_mode()) # used to test pipeline.jars and pipeline.classpaths config_key = sys.argv[1] config_value = sys.argv[2] t_env.get_config().set(config_key, config_value) # register Results table in table environment tmp_dir = tempfile.gettempdir() result_path = tmp_dir + '/result' if os.path.exists(result_path): try: if os.path.isfile(result_path): os.remove(result_path) else: shutil.rmtree(result_path) except OSError as e: logging.error("Error removing directory: %s - %s.", e.filename, e.strerror) logging.info("Results directory: %s", result_path) sink_ddl = """ create table Results( word VARCHAR, `count` BIGINT, `count_java` BIGINT ) with ( 'connector.type' = 'filesystem', 'format.type' = 'csv', 'connector.path' = '{}' ) """.format(result_path) t_env.execute_sql(sink_ddl) t_env.execute_sql( "create temporary system function add_one as 'add_one.add_one' language python" ) t_env.register_java_function("add_one_java", "org.apache.flink.python.tests.util.AddOne") elements = [(word, 0) for word in content.split(" ")] t = t_env.from_elements(elements, ["word", "count"]) t.select(t.word, call("add_one", t.count).alias("count"), call("add_one_java", t.count).alias("count_java")) \ .group_by(t.word) \ .select(t.word, col("count").count.alias("count"), col("count_java").count.alias("count_java")) \ .execute_insert("Results")
def word_count(): content = "line Licensed to the Apache Software Foundation ASF under one " \ "line or more contributor license agreements See the NOTICE file " \ "line distributed with this work for additional information " \ "line regarding copyright ownership The ASF licenses this file " \ "to you under the Apache License Version the " \ "License you may not use this file except in compliance " \ "with the License" t_env = TableEnvironment.create(EnvironmentSettings.in_batch_mode()) # register Results table in table environment tmp_dir = tempfile.gettempdir() result_path = tmp_dir + '/result' if os.path.exists(result_path): try: if os.path.isfile(result_path): os.remove(result_path) else: shutil.rmtree(result_path) except OSError as e: logging.error("Error removing directory: %s - %s.", e.filename, e.strerror) logging.info("Results directory: %s", result_path) sink_ddl = """ create table Results( word VARCHAR, `count` BIGINT ) with ( 'connector.type' = 'filesystem', 'format.type' = 'csv', 'connector.path' = '{}' ) """.format(result_path) t_env.execute_sql(sink_ddl) elements = [(word, 1) for word in content.split(" ")] table = t_env.from_elements(elements, ["word", "count"]) table.group_by(table.word) \ .select(table.word, expr.lit(1).count.alias('count')) \ .execute_insert("Results")
def test_mode_selection(self): builder = EnvironmentSettings.new_instance() # test the default behaviour to make sure it is consistent with the python doc environment_settings = builder.build() self.assertTrue(environment_settings.is_streaming_mode()) # test in_streaming_mode environment_settings = builder.in_streaming_mode().build() self.assertTrue(environment_settings.is_streaming_mode()) environment_settings = EnvironmentSettings.in_streaming_mode() self.assertTrue(environment_settings.is_streaming_mode()) # test in_batch_mode environment_settings = builder.in_batch_mode().build() self.assertFalse(environment_settings.is_streaming_mode()) environment_settings = EnvironmentSettings.in_batch_mode() self.assertFalse(environment_settings.is_streaming_mode())