def test_execute_insert_to_table_descriptor(self): schema = Schema.new_builder() \ .column("f0", DataTypes.STRING()) \ .build() table = self.t_env.from_descriptor( TableDescriptor.for_connector("datagen").option( "number-of-rows", '10').schema(schema).build()) table_result = table.execute_insert( TableDescriptor.for_connector("blackhole").schema(schema).build()) table_result.collect()
def test_stream_case(self): from pyflink.shell import s_env, st_env, DataTypes from pyflink.table.schema import Schema from pyflink.table.table_descriptor import TableDescriptor, FormatDescriptor # example begin import tempfile import os import shutil sink_path = tempfile.gettempdir() + '/streaming.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) s_env.set_parallelism(1) t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) st_env.create_temporary_table( "stream_sink", TableDescriptor.for_connector("filesystem").schema( Schema.new_builder().column("a", DataTypes.BIGINT()).column( "b", DataTypes.STRING()).column( "c", DataTypes.STRING()).build()).option( "path", sink_path).format( FormatDescriptor.for_format("csv").option( "field-delimiter", ",").build()).build()) t.select(t.a + 1, t.b, t.c).execute_insert("stream_sink").wait() # verify code, do not copy these code to shell.py with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f: lines = f.read() self.assertEqual(lines, '2,hi,hello\n' + '3,hi,hello\n')
def test_create_temporary_table_from_descriptor(self): from pyflink.table.schema import Schema t_env = self.t_env catalog = t_env.get_current_catalog() database = t_env.get_current_database() schema = Schema.new_builder().column("f0", DataTypes.INT()).build() t_env.create_temporary_table( "T", TableDescriptor.for_connector("fake") .schema(schema) .option("a", "Test") .build()) self.assertFalse(t_env.get_catalog(catalog).table_exists(ObjectPath(database, "T"))) gateway = get_gateway() catalog_table = CatalogBaseTable( t_env._j_tenv.getCatalogManager() .getTable(gateway.jvm.ObjectIdentifier.of(catalog, database, "T")) .get() .getTable()) self.assertEqual(schema, catalog_table.get_unresolved_schema()) self.assertEqual("fake", catalog_table.get_options().get("connector")) self.assertEqual("Test", catalog_table.get_options().get("a"))
def test_to_string(self): schema = Schema.new_builder().column("f0", DataTypes.STRING()).build() format_descriptor = FormatDescriptor \ .for_format("test-format") \ .option(self.option_a, False) \ .build() table_descriptor = TableDescriptor.for_connector("test-connector") \ .schema(schema) \ .partitioned_by("f0") \ .option(self.option_a, True) \ .format(format_descriptor) \ .comment("Test Comment") \ .build() self.assertEqual("test-format[{a=false}]", str(format_descriptor)) self.assertEqual( """( `f0` STRING ) COMMENT 'Test Comment' PARTITIONED BY (`f0`) WITH ( 'a' = 'true', 'connector' = 'test-connector', 'test-format.a' = 'false', 'format' = 'test-format' )""", str(table_descriptor))
def test_format_basic(self): descriptor = TableDescriptor.for_connector("test-connector") \ .schema(Schema.new_builder().build()) \ .format("json") \ .build() self.assertEqual(2, len(descriptor.get_options())) self.assertEqual("test-connector", descriptor.get_options().get("connector")) self.assertEqual("json", descriptor.get_options().get("format"))
def test_statement_set_insert_using_table_descriptor(self): schema = Schema.new_builder() \ .column("f0", DataTypes.INT()) \ .build() source_descriptor = TableDescriptor.for_connector("datagen") \ .schema(schema) \ .option("number-of-rows", '10') \ .build() sink_descriptor = TableDescriptor.for_connector("blackhole") \ .schema(schema) \ .build() self.t_env.create_temporary_table("T", source_descriptor) stmt_set = self.t_env.create_statement_set() stmt_set.add_insert(sink_descriptor, self.t_env.from_path("T")) stmt_set.execute().wait()
def test_table_from_descriptor(self): from pyflink.table.schema import Schema schema = Schema.new_builder().column("f0", DataTypes.INT()).build() descriptor = TableDescriptor.for_connector("fake").schema(schema).build() table = self.t_env.from_descriptor(descriptor) self.assertEqual(schema, Schema(Schema.new_builder()._j_builder .fromResolvedSchema(table._j_table.getResolvedSchema()).build())) contextResolvedTable = table._j_table.getQueryOperation().getContextResolvedTable() options = contextResolvedTable.getTable().getOptions() self.assertEqual("fake", options.get("connector"))
def test_options(self): descriptor = TableDescriptor.for_connector("test-connector") \ .schema(Schema.new_builder().build()) \ .option(self.option_a, False) \ .option(self.option_b, 42) \ .option("c", "C") \ .build() self.assertEqual(4, len(descriptor.get_options())) self.assertEqual("test-connector", descriptor.get_options().get("connector")) self.assertEqual("false", descriptor.get_options().get("a")) self.assertEqual("42", descriptor.get_options().get("b")) self.assertEqual("C", descriptor.get_options().get("c"))
def test_table_from_descriptor(self): from pyflink.table.schema import Schema schema = Schema.new_builder().column("f0", DataTypes.INT()).build() descriptor = TableDescriptor.for_connector("fake").schema( schema).build() table = self.t_env.from_descriptor(descriptor) self.assertEqual( schema, Schema(Schema.new_builder()._j_builder.fromResolvedSchema( table._j_table.getResolvedSchema()).build())) table = CatalogBaseTable( self.t_env._j_tenv.getCatalogManager().getTable( table._j_table.getQueryOperation().getTableIdentifier()).get( ).getTable()) self.assertEqual("fake", table.get_options().get("connector"))
def test_create_table_from_descriptor(self): from pyflink.table.schema import Schema catalog = self.t_env.get_current_catalog() database = self.t_env.get_current_database() schema = Schema.new_builder().column("f0", DataTypes.INT()).build() self.t_env.create_table( "T", TableDescriptor.for_connector("fake").schema(schema).option( "a", "Test").build()) object_path = ObjectPath(database, "T") self.assertTrue( self.t_env.get_catalog(catalog).table_exists(object_path)) catalog_table = self.t_env.get_catalog(catalog).get_table(object_path) self.assertEqual(schema, catalog_table.get_unresolved_schema()) self.assertEqual("fake", catalog_table.get_options().get("connector")) self.assertEqual("Test", catalog_table.get_options().get("a"))
def test_format_with_format_descriptor(self): descriptor = TableDescriptor.for_connector("test-connector") \ .schema(Schema.new_builder().build()) \ .format(FormatDescriptor.for_format("test-format") .option(self.option_a, True) .option(self.option_b, 42) .option("c", "C") .build(), self.key_format) \ .build() self.assertEqual(5, len(descriptor.get_options())) self.assertEqual("test-connector", descriptor.get_options().get("connector")) self.assertEqual("test-format", descriptor.get_options().get("key.format")) self.assertEqual("true", descriptor.get_options().get("key.test-format.a")) self.assertEqual("42", descriptor.get_options().get("key.test-format.b")) self.assertEqual("C", descriptor.get_options().get("key.test-format.c"))
def test_basic(self): schema = Schema.new_builder() \ .column("f0", DataTypes.STRING()) \ .column("f1", DataTypes.BIGINT()) \ .primary_key("f0") \ .build() descriptor = TableDescriptor.for_connector("test-connector") \ .schema(schema) \ .partitioned_by("f0") \ .comment("Test Comment") \ .build() self.assertIsNotNone(descriptor.get_schema()) self.assertEqual(1, len(descriptor.get_partition_keys())) self.assertEqual("f0", descriptor.get_partition_keys()[0]) self.assertEqual(1, len(descriptor.get_options())) self.assertEqual("test-connector", descriptor.get_options().get("connector")) self.assertEqual("Test Comment", descriptor.get_comment())
import os import shutil sink_path = tempfile.gettempdir() + '/batch.csv' if os.path.exists(sink_path): if os.path.isfile(sink_path): os.remove(sink_path) else: shutil.rmtree(sink_path) s_env.set_parallelism(1) t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')], ['a', 'b', 'c']) st_env.create_temporary_table( "csv_sink", TableDescriptor.for_connector("filesystem").schema( Schema.new_builder().column( "a", DataTypes.BIGINT()).column("b", DataTypes.STRING()).column( "c", DataTypes.STRING()).build()).option("path", sink_path).format( FormatDescriptor.for_format("csv").option( "field-delimiter", ",").build()).build()) t.select(t.a + lit(1), t.b, t.c).execute_insert("csv_sink").wait() with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f: lines = f.read() assert lines == '2,hi,hello\n' + '3,hi,hello\n' print('pip_test_code.py success!')
def test_no_schema(self): descriptor = TableDescriptor.for_connector("test-connector").build() self.assertIsNone(descriptor.get_schema())