示例#1
0
 def test_execute_insert_to_table_descriptor(self):
     schema = Schema.new_builder() \
         .column("f0", DataTypes.STRING()) \
         .build()
     table = self.t_env.from_descriptor(
         TableDescriptor.for_connector("datagen").option(
             "number-of-rows", '10').schema(schema).build())
     table_result = table.execute_insert(
         TableDescriptor.for_connector("blackhole").schema(schema).build())
     table_result.collect()
示例#2
0
    def test_stream_case(self):
        from pyflink.shell import s_env, st_env, DataTypes
        from pyflink.table.schema import Schema
        from pyflink.table.table_descriptor import TableDescriptor, FormatDescriptor
        # example begin

        import tempfile
        import os
        import shutil
        sink_path = tempfile.gettempdir() + '/streaming.csv'
        if os.path.exists(sink_path):
            if os.path.isfile(sink_path):
                os.remove(sink_path)
            else:
                shutil.rmtree(sink_path)
        s_env.set_parallelism(1)
        t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')],
                                 ['a', 'b', 'c'])

        st_env.create_temporary_table(
            "stream_sink",
            TableDescriptor.for_connector("filesystem").schema(
                Schema.new_builder().column("a", DataTypes.BIGINT()).column(
                    "b", DataTypes.STRING()).column(
                        "c", DataTypes.STRING()).build()).option(
                            "path", sink_path).format(
                                FormatDescriptor.for_format("csv").option(
                                    "field-delimiter", ",").build()).build())

        t.select(t.a + 1, t.b, t.c).execute_insert("stream_sink").wait()

        # verify code, do not copy these code to shell.py
        with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f:
            lines = f.read()
            self.assertEqual(lines, '2,hi,hello\n' + '3,hi,hello\n')
示例#3
0
    def test_create_temporary_table_from_descriptor(self):
        from pyflink.table.schema import Schema

        t_env = self.t_env
        catalog = t_env.get_current_catalog()
        database = t_env.get_current_database()
        schema = Schema.new_builder().column("f0", DataTypes.INT()).build()
        t_env.create_temporary_table(
            "T",
            TableDescriptor.for_connector("fake")
             .schema(schema)
             .option("a", "Test")
             .build())

        self.assertFalse(t_env.get_catalog(catalog).table_exists(ObjectPath(database, "T")))
        gateway = get_gateway()

        catalog_table = CatalogBaseTable(
            t_env._j_tenv.getCatalogManager()
                 .getTable(gateway.jvm.ObjectIdentifier.of(catalog, database, "T"))
                 .get()
                 .getTable())
        self.assertEqual(schema, catalog_table.get_unresolved_schema())
        self.assertEqual("fake", catalog_table.get_options().get("connector"))
        self.assertEqual("Test", catalog_table.get_options().get("a"))
示例#4
0
    def test_to_string(self):
        schema = Schema.new_builder().column("f0", DataTypes.STRING()).build()
        format_descriptor = FormatDescriptor \
            .for_format("test-format") \
            .option(self.option_a, False) \
            .build()
        table_descriptor = TableDescriptor.for_connector("test-connector") \
            .schema(schema) \
            .partitioned_by("f0") \
            .option(self.option_a, True) \
            .format(format_descriptor) \
            .comment("Test Comment") \
            .build()
        self.assertEqual("test-format[{a=false}]", str(format_descriptor))
        self.assertEqual(
            """(
  `f0` STRING
)
COMMENT 'Test Comment'
PARTITIONED BY (`f0`)
WITH (
  'a' = 'true',
  'connector' = 'test-connector',
  'test-format.a' = 'false',
  'format' = 'test-format'
)""", str(table_descriptor))
示例#5
0
 def test_format_basic(self):
     descriptor = TableDescriptor.for_connector("test-connector") \
         .schema(Schema.new_builder().build()) \
         .format("json") \
         .build()
     self.assertEqual(2, len(descriptor.get_options()))
     self.assertEqual("test-connector",
                      descriptor.get_options().get("connector"))
     self.assertEqual("json", descriptor.get_options().get("format"))
示例#6
0
    def test_statement_set_insert_using_table_descriptor(self):
        schema = Schema.new_builder() \
            .column("f0", DataTypes.INT()) \
            .build()

        source_descriptor = TableDescriptor.for_connector("datagen") \
            .schema(schema) \
            .option("number-of-rows", '10') \
            .build()

        sink_descriptor = TableDescriptor.for_connector("blackhole") \
            .schema(schema) \
            .build()

        self.t_env.create_temporary_table("T", source_descriptor)

        stmt_set = self.t_env.create_statement_set()
        stmt_set.add_insert(sink_descriptor, self.t_env.from_path("T"))

        stmt_set.execute().wait()
示例#7
0
    def test_table_from_descriptor(self):
        from pyflink.table.schema import Schema

        schema = Schema.new_builder().column("f0", DataTypes.INT()).build()
        descriptor = TableDescriptor.for_connector("fake").schema(schema).build()

        table = self.t_env.from_descriptor(descriptor)
        self.assertEqual(schema,
                         Schema(Schema.new_builder()._j_builder
                                .fromResolvedSchema(table._j_table.getResolvedSchema()).build()))
        contextResolvedTable = table._j_table.getQueryOperation().getContextResolvedTable()
        options = contextResolvedTable.getTable().getOptions()
        self.assertEqual("fake", options.get("connector"))
示例#8
0
 def test_options(self):
     descriptor = TableDescriptor.for_connector("test-connector") \
         .schema(Schema.new_builder().build()) \
         .option(self.option_a, False) \
         .option(self.option_b, 42) \
         .option("c", "C") \
         .build()
     self.assertEqual(4, len(descriptor.get_options()))
     self.assertEqual("test-connector",
                      descriptor.get_options().get("connector"))
     self.assertEqual("false", descriptor.get_options().get("a"))
     self.assertEqual("42", descriptor.get_options().get("b"))
     self.assertEqual("C", descriptor.get_options().get("c"))
    def test_table_from_descriptor(self):
        from pyflink.table.schema import Schema

        schema = Schema.new_builder().column("f0", DataTypes.INT()).build()
        descriptor = TableDescriptor.for_connector("fake").schema(
            schema).build()

        table = self.t_env.from_descriptor(descriptor)
        self.assertEqual(
            schema,
            Schema(Schema.new_builder()._j_builder.fromResolvedSchema(
                table._j_table.getResolvedSchema()).build()))
        table = CatalogBaseTable(
            self.t_env._j_tenv.getCatalogManager().getTable(
                table._j_table.getQueryOperation().getTableIdentifier()).get(
                ).getTable())
        self.assertEqual("fake", table.get_options().get("connector"))
示例#10
0
    def test_create_table_from_descriptor(self):
        from pyflink.table.schema import Schema

        catalog = self.t_env.get_current_catalog()
        database = self.t_env.get_current_database()
        schema = Schema.new_builder().column("f0", DataTypes.INT()).build()
        self.t_env.create_table(
            "T",
            TableDescriptor.for_connector("fake").schema(schema).option(
                "a", "Test").build())
        object_path = ObjectPath(database, "T")
        self.assertTrue(
            self.t_env.get_catalog(catalog).table_exists(object_path))

        catalog_table = self.t_env.get_catalog(catalog).get_table(object_path)
        self.assertEqual(schema, catalog_table.get_unresolved_schema())
        self.assertEqual("fake", catalog_table.get_options().get("connector"))
        self.assertEqual("Test", catalog_table.get_options().get("a"))
示例#11
0
 def test_format_with_format_descriptor(self):
     descriptor = TableDescriptor.for_connector("test-connector") \
         .schema(Schema.new_builder().build()) \
         .format(FormatDescriptor.for_format("test-format")
                 .option(self.option_a, True)
                 .option(self.option_b, 42)
                 .option("c", "C")
                 .build(),
                 self.key_format) \
         .build()
     self.assertEqual(5, len(descriptor.get_options()))
     self.assertEqual("test-connector",
                      descriptor.get_options().get("connector"))
     self.assertEqual("test-format",
                      descriptor.get_options().get("key.format"))
     self.assertEqual("true",
                      descriptor.get_options().get("key.test-format.a"))
     self.assertEqual("42",
                      descriptor.get_options().get("key.test-format.b"))
     self.assertEqual("C",
                      descriptor.get_options().get("key.test-format.c"))
示例#12
0
    def test_basic(self):
        schema = Schema.new_builder() \
            .column("f0", DataTypes.STRING()) \
            .column("f1", DataTypes.BIGINT()) \
            .primary_key("f0") \
            .build()

        descriptor = TableDescriptor.for_connector("test-connector") \
            .schema(schema) \
            .partitioned_by("f0") \
            .comment("Test Comment") \
            .build()

        self.assertIsNotNone(descriptor.get_schema())

        self.assertEqual(1, len(descriptor.get_partition_keys()))
        self.assertEqual("f0", descriptor.get_partition_keys()[0])

        self.assertEqual(1, len(descriptor.get_options()))
        self.assertEqual("test-connector",
                         descriptor.get_options().get("connector"))

        self.assertEqual("Test Comment", descriptor.get_comment())
示例#13
0
import os
import shutil

sink_path = tempfile.gettempdir() + '/batch.csv'
if os.path.exists(sink_path):
    if os.path.isfile(sink_path):
        os.remove(sink_path)
    else:
        shutil.rmtree(sink_path)
s_env.set_parallelism(1)
t = st_env.from_elements([(1, 'hi', 'hello'), (2, 'hi', 'hello')],
                         ['a', 'b', 'c'])

st_env.create_temporary_table(
    "csv_sink",
    TableDescriptor.for_connector("filesystem").schema(
        Schema.new_builder().column(
            "a", DataTypes.BIGINT()).column("b", DataTypes.STRING()).column(
                "c",
                DataTypes.STRING()).build()).option("path", sink_path).format(
                    FormatDescriptor.for_format("csv").option(
                        "field-delimiter", ",").build()).build())

t.select(t.a + lit(1), t.b, t.c).execute_insert("csv_sink").wait()

with open(os.path.join(sink_path, os.listdir(sink_path)[0]), 'r') as f:
    lines = f.read()
    assert lines == '2,hi,hello\n' + '3,hi,hello\n'

print('pip_test_code.py success!')
示例#14
0
 def test_no_schema(self):
     descriptor = TableDescriptor.for_connector("test-connector").build()
     self.assertIsNone(descriptor.get_schema())