Exemplo n.º 1
0
    def test_from_element(self):
        t_env = self.t_env
        field_names = [
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
            "n", "o", "p", "q", "r", "s"
        ]
        field_types = [
            DataTypes.BIGINT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING(),
            DataTypes.STRING(),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(),
            DataTypes.TIMESTAMP_WITH_LOCAL_TIME_ZONE(),
            DataTypes.INTERVAL(DataTypes.DAY(), DataTypes.SECOND()),
            DataTypes.ARRAY(DataTypes.DOUBLE()),
            DataTypes.ARRAY(DataTypes.DOUBLE(False)),
            DataTypes.ARRAY(DataTypes.STRING()),
            DataTypes.ARRAY(DataTypes.DATE()),
            DataTypes.DECIMAL(10, 0),
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.DOUBLE())
            ]),
            DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()),
            DataTypes.BYTES(),
            ExamplePointUDT(),
            PythonOnlyUDT()
        ]
        schema = DataTypes.ROW(
            list(
                map(
                    lambda field_name, field_type: DataTypes.FIELD(
                        field_name, field_type), field_names, field_types)))
        table_sink = source_sink_utils.TestAppendSink(field_names, field_types)
        t_env.register_table_sink("Results", table_sink)
        t = t_env.from_elements(
            [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2),
              datetime.time(1, 0, 0), datetime.datetime(
                  1970, 1, 2, 0, 0), datetime.datetime(1970, 1, 2, 0, 0),
              datetime.timedelta(days=1, microseconds=10), [1.0, None],
              array.array("d", [1.0, 2.0]), ["abc"],
              [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), {
                  "key": 1.0
              }, bytearray(b'ABCD'), ExamplePoint(
                  1.0, 2.0), PythonOnlyPoint(3.0, 4.0))], schema)
        t.insert_into("Results")
        self.env.execute()
        actual = source_sink_utils.results()

        expected = [
            '1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,'
            '1970-01-02 00:00:00.0,86400000010,[1.0, null],[1.0, 2.0],[abc],[1970-01-02],'
            '1,1,2.0,{key=1.0},[65, 66, 67, 68],[1.0, 2.0],[3.0, 4.0]'
        ]
        self.assert_equals(actual, expected)
Exemplo n.º 2
0
    def test_blink_from_element(self):
        t_env = BatchTableEnvironment.create(
            environment_settings=EnvironmentSettings.new_instance(
            ).use_blink_planner().in_batch_mode().build())
        field_names = [
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
            "n", "o", "p", "q"
        ]
        field_types = [
            DataTypes.BIGINT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING(),
            DataTypes.STRING(),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(3),
            DataTypes.INTERVAL(DataTypes.SECOND(3)),
            DataTypes.ARRAY(DataTypes.DOUBLE()),
            DataTypes.ARRAY(DataTypes.DOUBLE(False)),
            DataTypes.ARRAY(DataTypes.STRING()),
            DataTypes.ARRAY(DataTypes.DATE()),
            DataTypes.DECIMAL(38, 18),
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.DOUBLE())
            ]),
            DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()),
            DataTypes.BYTES(),
            PythonOnlyUDT()
        ]
        schema = DataTypes.ROW(
            list(
                map(
                    lambda field_name, field_type: DataTypes.FIELD(
                        field_name, field_type), field_names, field_types)))
        table_sink = source_sink_utils.TestAppendSink(field_names, field_types)
        t_env.register_table_sink("Results", table_sink)
        t = t_env.from_elements(
            [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2),
              datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0),
              datetime.timedelta(days=1, microseconds=10), [1.0, None],
              array.array("d", [1.0, 2.0]), ["abc"],
              [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), {
                  "key": 1.0
              }, bytearray(b'ABCD'), PythonOnlyPoint(3.0, 4.0))], schema)
        t.insert_into("Results")
        t_env.execute("test")
        actual = source_sink_utils.results()

        expected = [
            '1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,'
            '86400000,[1.0, null],[1.0, 2.0],[abc],[1970-01-02],'
            '1.000000000000000000,1,2.0,{key=1.0},[65, 66, 67, 68],[3.0, 4.0]'
        ]
        self.assert_equals(actual, expected)
Exemplo n.º 3
0
 def test_collect_for_all_data_types(self):
     expected_result = [Row(1, None, 1, True, 32767, -2147483648, 1.23,
                            1.98932, bytearray(b'pyflink'), 'pyflink',
                            datetime.date(2014, 9, 13), datetime.time(12, 0),
                            datetime.datetime(2018, 3, 11, 3, 0, 0, 123000),
                            [Row(['[pyflink]']), Row(['[pyflink]']),
                             Row(['[pyflink]'])], {1: Row(['[flink]']), 2: Row(['[pyflink]'])},
                            decimal.Decimal('1000000000000000000.05'),
                            decimal.Decimal(
                                '1000000000000000000.05999999999999999899999999999'))]
     source = self.t_env.from_elements([(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
                                         bytearray(b'pyflink'), 'pyflink',
                                         datetime.date(2014, 9, 13),
                                         datetime.time(hour=12, minute=0, second=0,
                                                       microsecond=123000),
                                         datetime.datetime(2018, 3, 11, 3, 0, 0, 123000),
                                         [Row(['pyflink']), Row(['pyflink']), Row(['pyflink'])],
                                         {1: Row(['flink']), 2: Row(['pyflink'])},
                                         decimal.Decimal('1000000000000000000.05'),
                                         decimal.Decimal(
                                             '1000000000000000000.0599999999999999989'
                                             '9999999999'))],
                                       DataTypes.ROW([DataTypes.FIELD("a", DataTypes.BIGINT()),
                                                      DataTypes.FIELD("b", DataTypes.BIGINT()),
                                                      DataTypes.FIELD("c", DataTypes.TINYINT()),
                                                      DataTypes.FIELD("d", DataTypes.BOOLEAN()),
                                                      DataTypes.FIELD("e", DataTypes.SMALLINT()),
                                                      DataTypes.FIELD("f", DataTypes.INT()),
                                                      DataTypes.FIELD("g", DataTypes.FLOAT()),
                                                      DataTypes.FIELD("h", DataTypes.DOUBLE()),
                                                      DataTypes.FIELD("i", DataTypes.BYTES()),
                                                      DataTypes.FIELD("j", DataTypes.STRING()),
                                                      DataTypes.FIELD("k", DataTypes.DATE()),
                                                      DataTypes.FIELD("l", DataTypes.TIME()),
                                                      DataTypes.FIELD("m",
                                                                      DataTypes.TIMESTAMP(3)),
                                                      DataTypes.FIELD("n", DataTypes.ARRAY(
                                                          DataTypes.ROW([DataTypes.FIELD('ss2',
                                                                         DataTypes.STRING())]))),
                                                      DataTypes.FIELD("o", DataTypes.MAP(
                                                          DataTypes.BIGINT(), DataTypes.ROW(
                                                              [DataTypes.FIELD('ss',
                                                               DataTypes.STRING())]))),
                                                      DataTypes.FIELD("p",
                                                                      DataTypes.DECIMAL(38, 18)),
                                                      DataTypes.FIELD("q",
                                                                      DataTypes.DECIMAL(38,
                                                                                        18))]))
     table_result = source.execute()
     with table_result.collect() as result:
         collected_result = []
         for i in result:
             collected_result.append(i)
         self.assertEqual(expected_result, collected_result)
Exemplo n.º 4
0
    def test_from_element(self):
        t_env = self.t_env
        a = array.array('b')
        a.fromstring('ABCD')
        t = t_env.from_elements([
            (1, 1.0, "hi", "hello", datetime.date(1970, 1, 2),
             datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0,
                                                       0), [1.0, None],
             array.array("d",
                         [1.0, 2.0]), ["abc"], [datetime.date(1970, 1, 2)],
             Decimal(1), Row("a", "b")(1, 2.0), {
                 "key": 1.0
             }, a, ExamplePoint(1.0, 2.0), PythonOnlyPoint(3.0, 4.0))
        ])
        field_names = [
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
            "n", "o", "p", "q"
        ]
        field_types = [
            DataTypes.BIGINT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING(),
            DataTypes.STRING(),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(),
            DataTypes.ARRAY(DataTypes.DOUBLE()),
            DataTypes.ARRAY(DataTypes.DOUBLE(False)),
            DataTypes.ARRAY(DataTypes.STRING()),
            DataTypes.ARRAY(DataTypes.DATE()),
            DataTypes.DECIMAL(10, 0),
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.DOUBLE())
            ]),
            DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()),
            DataTypes.BYTES(),
            ExamplePointUDT(),
            PythonOnlyUDT()
        ]
        table_sink = source_sink_utils.TestAppendSink(field_names, field_types)
        t_env.register_table_sink("Results", table_sink)

        t.insert_into("Results")
        t_env.exec_env().execute()
        actual = source_sink_utils.results()

        expected = [
            '1,1.0,hi,hello,1970-01-02,01:00:00,1970-01-02 00:00:00.0,[1.0, null],'
            '[1.0, 2.0],[abc],[1970-01-02],1,1,2.0,{key=1.0},[65, 66, 67, 68],[1.0, 2.0],'
            '[3.0, 4.0]'
        ]
        self.assert_equals(actual, expected)
Exemplo n.º 5
0
    def test_all_data_types(self):
        import pandas as pd
        import numpy as np

        @udf(result_type=DataTypes.TINYINT(), func_type="pandas")
        def tinyint_func(tinyint_param):
            assert isinstance(tinyint_param, pd.Series)
            assert isinstance(tinyint_param[0], np.int8), \
                'tinyint_param of wrong type %s !' % type(tinyint_param[0])
            return tinyint_param

        @udf(result_type=DataTypes.SMALLINT(), func_type="pandas")
        def smallint_func(smallint_param):
            assert isinstance(smallint_param, pd.Series)
            assert isinstance(smallint_param[0], np.int16), \
                'smallint_param of wrong type %s !' % type(smallint_param[0])
            assert smallint_param[
                0] == 32767, 'smallint_param of wrong value %s' % smallint_param
            return smallint_param

        @udf(result_type=DataTypes.INT(), func_type="pandas")
        def int_func(int_param):
            assert isinstance(int_param, pd.Series)
            assert isinstance(int_param[0], np.int32), \
                'int_param of wrong type %s !' % type(int_param[0])
            assert int_param[
                0] == -2147483648, 'int_param of wrong value %s' % int_param
            return int_param

        @udf(result_type=DataTypes.BIGINT(), func_type="pandas")
        def bigint_func(bigint_param):
            assert isinstance(bigint_param, pd.Series)
            assert isinstance(bigint_param[0], np.int64), \
                'bigint_param of wrong type %s !' % type(bigint_param[0])
            return bigint_param

        @udf(result_type=DataTypes.BOOLEAN(), func_type="pandas")
        def boolean_func(boolean_param):
            assert isinstance(boolean_param, pd.Series)
            assert isinstance(boolean_param[0], np.bool_), \
                'boolean_param of wrong type %s !' % type(boolean_param[0])
            return boolean_param

        @udf(result_type=DataTypes.FLOAT(), func_type="pandas")
        def float_func(float_param):
            assert isinstance(float_param, pd.Series)
            assert isinstance(float_param[0], np.float32), \
                'float_param of wrong type %s !' % type(float_param[0])
            return float_param

        @udf(result_type=DataTypes.DOUBLE(), func_type="pandas")
        def double_func(double_param):
            assert isinstance(double_param, pd.Series)
            assert isinstance(double_param[0], np.float64), \
                'double_param of wrong type %s !' % type(double_param[0])
            return double_param

        @udf(result_type=DataTypes.STRING(), func_type="pandas")
        def varchar_func(varchar_param):
            assert isinstance(varchar_param, pd.Series)
            assert isinstance(varchar_param[0], str), \
                'varchar_param of wrong type %s !' % type(varchar_param[0])
            return varchar_param

        @udf(result_type=DataTypes.BYTES(), func_type="pandas")
        def varbinary_func(varbinary_param):
            assert isinstance(varbinary_param, pd.Series)
            assert isinstance(varbinary_param[0], bytes), \
                'varbinary_param of wrong type %s !' % type(varbinary_param[0])
            return varbinary_param

        @udf(result_type=DataTypes.DECIMAL(38, 18), func_type="pandas")
        def decimal_func(decimal_param):
            assert isinstance(decimal_param, pd.Series)
            assert isinstance(decimal_param[0], decimal.Decimal), \
                'decimal_param of wrong type %s !' % type(decimal_param[0])
            return decimal_param

        @udf(result_type=DataTypes.DATE(), func_type="pandas")
        def date_func(date_param):
            assert isinstance(date_param, pd.Series)
            assert isinstance(date_param[0], datetime.date), \
                'date_param of wrong type %s !' % type(date_param[0])
            return date_param

        @udf(result_type=DataTypes.TIME(), func_type="pandas")
        def time_func(time_param):
            assert isinstance(time_param, pd.Series)
            assert isinstance(time_param[0], datetime.time), \
                'time_param of wrong type %s !' % type(time_param[0])
            return time_param

        timestamp_value = datetime.datetime(1970, 1, 2, 0, 0, 0, 123000)

        @udf(result_type=DataTypes.TIMESTAMP(3), func_type="pandas")
        def timestamp_func(timestamp_param):
            assert isinstance(timestamp_param, pd.Series)
            assert isinstance(timestamp_param[0], datetime.datetime), \
                'timestamp_param of wrong type %s !' % type(timestamp_param[0])
            assert timestamp_param[0] == timestamp_value, \
                'timestamp_param is wrong value %s, should be %s!' % (timestamp_param[0],
                                                                      timestamp_value)
            return timestamp_param

        def array_func(array_param):
            assert isinstance(array_param, pd.Series)
            assert isinstance(array_param[0], np.ndarray), \
                'array_param of wrong type %s !' % type(array_param[0])
            return array_param

        array_str_func = udf(array_func,
                             result_type=DataTypes.ARRAY(DataTypes.STRING()),
                             func_type="pandas")

        array_timestamp_func = udf(array_func,
                                   result_type=DataTypes.ARRAY(
                                       DataTypes.TIMESTAMP(3)),
                                   func_type="pandas")

        array_int_func = udf(array_func,
                             result_type=DataTypes.ARRAY(DataTypes.INT()),
                             func_type="pandas")

        @udf(result_type=DataTypes.ARRAY(DataTypes.STRING()),
             func_type="pandas")
        def nested_array_func(nested_array_param):
            assert isinstance(nested_array_param, pd.Series)
            assert isinstance(nested_array_param[0], np.ndarray), \
                'nested_array_param of wrong type %s !' % type(nested_array_param[0])
            return pd.Series(nested_array_param[0])

        row_type = DataTypes.ROW([
            DataTypes.FIELD("f1", DataTypes.INT()),
            DataTypes.FIELD("f2", DataTypes.STRING()),
            DataTypes.FIELD("f3", DataTypes.TIMESTAMP(3)),
            DataTypes.FIELD("f4", DataTypes.ARRAY(DataTypes.INT()))
        ])

        @udf(result_type=row_type, func_type="pandas")
        def row_func(row_param):
            assert isinstance(row_param, pd.DataFrame)
            assert isinstance(row_param.f1, pd.Series)
            assert isinstance(row_param.f1[0], np.int32), \
                'row_param.f1 of wrong type %s !' % type(row_param.f1[0])
            assert isinstance(row_param.f2, pd.Series)
            assert isinstance(row_param.f2[0], str), \
                'row_param.f2 of wrong type %s !' % type(row_param.f2[0])
            assert isinstance(row_param.f3, pd.Series)
            assert isinstance(row_param.f3[0], datetime.datetime), \
                'row_param.f3 of wrong type %s !' % type(row_param.f3[0])
            assert isinstance(row_param.f4, pd.Series)
            assert isinstance(row_param.f4[0], np.ndarray), \
                'row_param.f4 of wrong type %s !' % type(row_param.f4[0])
            return row_param

        table_sink = source_sink_utils.TestAppendSink([
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
            'n', 'o', 'p', 'q', 'r', 's', 't', 'u'
        ], [
            DataTypes.TINYINT(),
            DataTypes.SMALLINT(),
            DataTypes.INT(),
            DataTypes.BIGINT(),
            DataTypes.BOOLEAN(),
            DataTypes.BOOLEAN(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING(),
            DataTypes.STRING(),
            DataTypes.BYTES(),
            DataTypes.DECIMAL(38, 18),
            DataTypes.DECIMAL(38, 18),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(3),
            DataTypes.ARRAY(DataTypes.STRING()),
            DataTypes.ARRAY(DataTypes.TIMESTAMP(3)),
            DataTypes.ARRAY(DataTypes.INT()),
            DataTypes.ARRAY(DataTypes.STRING()), row_type
        ])
        self.t_env.register_table_sink("Results", table_sink)

        t = self.t_env.from_elements(
            [(1, 32767, -2147483648, 1, True, False, 1.0, 1.0, 'hello', '中文',
              bytearray(b'flink'), decimal.Decimal('1000000000000000000.05'),
              decimal.Decimal(
                  '1000000000000000000.05999999999999999899999999999'),
              datetime.date(2014, 9, 13),
              datetime.time(hour=1, minute=0, second=1), timestamp_value,
              ['hello', '中文', None], [timestamp_value], [1, 2], [[
                  'hello', '中文', None
              ]], Row(1, 'hello', timestamp_value, [1, 2]))],
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.TINYINT()),
                DataTypes.FIELD("b", DataTypes.SMALLINT()),
                DataTypes.FIELD("c", DataTypes.INT()),
                DataTypes.FIELD("d", DataTypes.BIGINT()),
                DataTypes.FIELD("e", DataTypes.BOOLEAN()),
                DataTypes.FIELD("f", DataTypes.BOOLEAN()),
                DataTypes.FIELD("g", DataTypes.FLOAT()),
                DataTypes.FIELD("h", DataTypes.DOUBLE()),
                DataTypes.FIELD("i", DataTypes.STRING()),
                DataTypes.FIELD("j", DataTypes.STRING()),
                DataTypes.FIELD("k", DataTypes.BYTES()),
                DataTypes.FIELD("l", DataTypes.DECIMAL(38, 18)),
                DataTypes.FIELD("m", DataTypes.DECIMAL(38, 18)),
                DataTypes.FIELD("n", DataTypes.DATE()),
                DataTypes.FIELD("o", DataTypes.TIME()),
                DataTypes.FIELD("p", DataTypes.TIMESTAMP(3)),
                DataTypes.FIELD("q", DataTypes.ARRAY(DataTypes.STRING())),
                DataTypes.FIELD("r", DataTypes.ARRAY(DataTypes.TIMESTAMP(3))),
                DataTypes.FIELD("s", DataTypes.ARRAY(DataTypes.INT())),
                DataTypes.FIELD(
                    "t", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.STRING()))),
                DataTypes.FIELD("u", row_type)
            ]))

        t.select(
            tinyint_func(t.a),
            smallint_func(t.b),
            int_func(t.c),
            bigint_func(t.d),
            boolean_func(t.e),
            boolean_func(t.f),
            float_func(t.g),
            double_func(t.h),
            varchar_func(t.i),
            varchar_func(t.j),
            varbinary_func(t.k),
            decimal_func(t.l),
            decimal_func(t.m),
            date_func(t.n),
            time_func(t.o),
            timestamp_func(t.p),
            array_str_func(t.q),
            array_timestamp_func(t.r),
            array_int_func(t.s),
            nested_array_func(t.t),
            row_func(t.u)) \
            .execute_insert("Results").wait()
        actual = source_sink_utils.results()
        self.assert_equals(actual, [
            "+I[1, 32767, -2147483648, 1, true, false, 1.0, 1.0, hello, 中文, "
            "[102, 108, 105, 110, 107], 1000000000000000000.050000000000000000, "
            "1000000000000000000.059999999999999999, 2014-09-13, 01:00:01, "
            "1970-01-02 00:00:00.123, [hello, 中文, null], [1970-01-02 00:00:00.123], "
            "[1, 2], [hello, 中文, null], +I[1, hello, 1970-01-02 00:00:00.123, [1, 2]]]"
        ])
Exemplo n.º 6
0
    def test_all_data_types(self):
        def boolean_func(bool_param):
            assert isinstance(bool_param, bool), 'bool_param of wrong type %s !' \
                                                 % type(bool_param)
            return bool_param

        def tinyint_func(tinyint_param):
            assert isinstance(tinyint_param, int), 'tinyint_param of wrong type %s !' \
                                                   % type(tinyint_param)
            return tinyint_param

        def smallint_func(smallint_param):
            assert isinstance(smallint_param, int), 'smallint_param of wrong type %s !' \
                                                    % type(smallint_param)
            assert smallint_param == 32767, 'smallint_param of wrong value %s' % smallint_param
            return smallint_param

        def int_func(int_param):
            assert isinstance(int_param, int), 'int_param of wrong type %s !' \
                                               % type(int_param)
            assert int_param == -2147483648, 'int_param of wrong value %s' % int_param
            return int_param

        def bigint_func(bigint_param):
            assert isinstance(bigint_param, int), 'bigint_param of wrong type %s !' \
                                                  % type(bigint_param)
            return bigint_param

        def bigint_func_none(bigint_param):
            assert bigint_param is None, 'bigint_param %s should be None!' % bigint_param
            return bigint_param

        def float_func(float_param):
            assert isinstance(float_param, float) and float_equal(float_param, 1.23, 1e-6), \
                'float_param is wrong value %s !' % float_param
            return float_param

        def double_func(double_param):
            assert isinstance(double_param, float) and float_equal(double_param, 1.98932, 1e-7), \
                'double_param is wrong value %s !' % double_param
            return double_param

        def bytes_func(bytes_param):
            assert bytes_param == b'flink', \
                'bytes_param is wrong value %s !' % bytes_param
            return bytes_param

        def str_func(str_param):
            assert str_param == 'pyflink', \
                'str_param is wrong value %s !' % str_param
            return str_param

        def date_func(date_param):
            from datetime import date
            assert date_param == date(year=2014, month=9, day=13), \
                'date_param is wrong value %s !' % date_param
            return date_param

        def time_func(time_param):
            from datetime import time
            assert time_param == time(hour=12, minute=0, second=0, microsecond=123000), \
                'time_param is wrong value %s !' % time_param
            return time_param

        def timestamp_func(timestamp_param):
            from datetime import datetime
            assert timestamp_param == datetime(2018, 3, 11, 3, 0, 0, 123000), \
                'timestamp_param is wrong value %s !' % timestamp_param
            return timestamp_param

        def array_func(array_param):
            assert array_param == [[1, 2, 3]], \
                'array_param is wrong value %s !' % array_param
            return array_param[0]

        def map_func(map_param):
            assert map_param == {1: 'flink', 2: 'pyflink'}, \
                'map_param is wrong value %s !' % map_param
            return map_param

        def decimal_func(decimal_param):
            from decimal import Decimal
            assert decimal_param == Decimal('1000000000000000000.050000000000000000'), \
                'decimal_param is wrong value %s !' % decimal_param
            return decimal_param

        def decimal_cut_func(decimal_param):
            from decimal import Decimal
            assert decimal_param == Decimal('1000000000000000000.059999999999999999'), \
                'decimal_param is wrong value %s !' % decimal_param
            return decimal_param

        self.t_env.create_temporary_system_function(
            "boolean_func", udf(boolean_func, result_type=DataTypes.BOOLEAN()))

        self.t_env.create_temporary_system_function(
            "tinyint_func", udf(tinyint_func, result_type=DataTypes.TINYINT()))

        self.t_env.create_temporary_system_function(
            "smallint_func",
            udf(smallint_func, result_type=DataTypes.SMALLINT()))

        self.t_env.create_temporary_system_function(
            "int_func", udf(int_func, result_type=DataTypes.INT()))

        self.t_env.create_temporary_system_function(
            "bigint_func", udf(bigint_func, result_type=DataTypes.BIGINT()))

        self.t_env.create_temporary_system_function(
            "bigint_func_none",
            udf(bigint_func_none, result_type=DataTypes.BIGINT()))

        self.t_env.create_temporary_system_function(
            "float_func", udf(float_func, result_type=DataTypes.FLOAT()))

        self.t_env.create_temporary_system_function(
            "double_func", udf(double_func, result_type=DataTypes.DOUBLE()))

        self.t_env.create_temporary_system_function(
            "bytes_func", udf(bytes_func, result_type=DataTypes.BYTES()))

        self.t_env.create_temporary_system_function(
            "str_func", udf(str_func, result_type=DataTypes.STRING()))

        self.t_env.create_temporary_system_function(
            "date_func", udf(date_func, result_type=DataTypes.DATE()))

        self.t_env.create_temporary_system_function(
            "time_func", udf(time_func, result_type=DataTypes.TIME()))

        self.t_env.create_temporary_system_function(
            "timestamp_func",
            udf(timestamp_func, result_type=DataTypes.TIMESTAMP(3)))

        self.t_env.create_temporary_system_function(
            "array_func",
            udf(array_func, result_type=DataTypes.ARRAY(DataTypes.BIGINT())))

        self.t_env.create_temporary_system_function(
            "map_func",
            udf(map_func,
                result_type=DataTypes.MAP(DataTypes.BIGINT(),
                                          DataTypes.STRING())))

        self.t_env.register_function(
            "decimal_func",
            udf(decimal_func, result_type=DataTypes.DECIMAL(38, 18)))

        self.t_env.register_function(
            "decimal_cut_func",
            udf(decimal_cut_func, result_type=DataTypes.DECIMAL(38, 18)))

        table_sink = source_sink_utils.TestAppendSink([
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
            'n', 'o', 'p', 'q'
        ], [
            DataTypes.BIGINT(),
            DataTypes.BIGINT(),
            DataTypes.TINYINT(),
            DataTypes.BOOLEAN(),
            DataTypes.SMALLINT(),
            DataTypes.INT(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.BYTES(),
            DataTypes.STRING(),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(3),
            DataTypes.ARRAY(DataTypes.BIGINT()),
            DataTypes.MAP(DataTypes.BIGINT(), DataTypes.STRING()),
            DataTypes.DECIMAL(38, 18),
            DataTypes.DECIMAL(38, 18)
        ])
        self.t_env.register_table_sink("Results", table_sink)

        import datetime
        import decimal
        t = self.t_env.from_elements(
            [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
              bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13),
              datetime.time(hour=12, minute=0, second=0, microsecond=123000),
              datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [[1, 2, 3]], {
                  1: 'flink',
                  2: 'pyflink'
              }, decimal.Decimal('1000000000000000000.05'),
              decimal.Decimal(
                  '1000000000000000000.05999999999999999899999999999'))],
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.BIGINT()),
                DataTypes.FIELD("c", DataTypes.TINYINT()),
                DataTypes.FIELD("d", DataTypes.BOOLEAN()),
                DataTypes.FIELD("e", DataTypes.SMALLINT()),
                DataTypes.FIELD("f", DataTypes.INT()),
                DataTypes.FIELD("g", DataTypes.FLOAT()),
                DataTypes.FIELD("h", DataTypes.DOUBLE()),
                DataTypes.FIELD("i", DataTypes.BYTES()),
                DataTypes.FIELD("j", DataTypes.STRING()),
                DataTypes.FIELD("k", DataTypes.DATE()),
                DataTypes.FIELD("l", DataTypes.TIME()),
                DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)),
                DataTypes.FIELD(
                    "n", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.BIGINT()))),
                DataTypes.FIELD(
                    "o", DataTypes.MAP(DataTypes.BIGINT(),
                                       DataTypes.STRING())),
                DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)),
                DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18))
            ]))

        exec_insert_table(
            t.select("bigint_func(a), bigint_func_none(b),"
                     "tinyint_func(c), boolean_func(d),"
                     "smallint_func(e),int_func(f),"
                     "float_func(g),double_func(h),"
                     "bytes_func(i),str_func(j),"
                     "date_func(k),time_func(l),"
                     "timestamp_func(m),array_func(n),"
                     "map_func(o),decimal_func(p),"
                     "decimal_cut_func(q)"), "Results")
        actual = source_sink_utils.results()
        # Currently the sink result precision of DataTypes.TIME(precision) only supports 0.
        self.assert_equals(actual, [
            "1,null,1,true,32767,-2147483648,1.23,1.98932,"
            "[102, 108, 105, 110, 107],pyflink,2014-09-13,"
            "12:00:00,2018-03-11 03:00:00.123,[1, 2, 3],"
            "{1=flink, 2=pyflink},1000000000000000000.050000000000000000,"
            "1000000000000000000.059999999999999999"
        ])
Exemplo n.º 7
0
    def test_all_data_types(self):
        import pandas as pd
        import numpy as np

        def tinyint_func(tinyint_param):
            assert isinstance(tinyint_param, pd.Series)
            assert isinstance(tinyint_param[0], np.int8), \
                'tinyint_param of wrong type %s !' % type(tinyint_param[0])
            return tinyint_param

        def smallint_func(smallint_param):
            assert isinstance(smallint_param, pd.Series)
            assert isinstance(smallint_param[0], np.int16), \
                'smallint_param of wrong type %s !' % type(smallint_param[0])
            assert smallint_param[
                0] == 32767, 'smallint_param of wrong value %s' % smallint_param
            return smallint_param

        def int_func(int_param):
            assert isinstance(int_param, pd.Series)
            assert isinstance(int_param[0], np.int32), \
                'int_param of wrong type %s !' % type(int_param[0])
            assert int_param[
                0] == -2147483648, 'int_param of wrong value %s' % int_param
            return int_param

        def bigint_func(bigint_param):
            assert isinstance(bigint_param, pd.Series)
            assert isinstance(bigint_param[0], np.int64), \
                'bigint_param of wrong type %s !' % type(bigint_param[0])
            return bigint_param

        def boolean_func(boolean_param):
            assert isinstance(boolean_param, pd.Series)
            assert isinstance(boolean_param[0], np.bool_), \
                'boolean_param of wrong type %s !' % type(boolean_param[0])
            return boolean_param

        def float_func(float_param):
            assert isinstance(float_param, pd.Series)
            assert isinstance(float_param[0], np.float32), \
                'float_param of wrong type %s !' % type(float_param[0])
            return float_param

        def double_func(double_param):
            assert isinstance(double_param, pd.Series)
            assert isinstance(double_param[0], np.float64), \
                'double_param of wrong type %s !' % type(double_param[0])
            return double_param

        def varchar_func(varchar_param):
            assert isinstance(varchar_param, pd.Series)
            assert isinstance(varchar_param[0], str), \
                'varchar_param of wrong type %s !' % type(varchar_param[0])
            return varchar_param

        def varbinary_func(varbinary_param):
            assert isinstance(varbinary_param, pd.Series)
            assert isinstance(varbinary_param[0], bytes), \
                'varbinary_param of wrong type %s !' % type(varbinary_param[0])
            return varbinary_param

        def decimal_func(decimal_param):
            assert isinstance(decimal_param, pd.Series)
            assert isinstance(decimal_param[0], decimal.Decimal), \
                'decimal_param of wrong type %s !' % type(decimal_param[0])
            return decimal_param

        def date_func(date_param):
            assert isinstance(date_param, pd.Series)
            assert isinstance(date_param[0], datetime.date), \
                'date_param of wrong type %s !' % type(date_param[0])
            return date_param

        def time_func(time_param):
            assert isinstance(time_param, pd.Series)
            assert isinstance(time_param[0], datetime.time), \
                'time_param of wrong type %s !' % type(time_param[0])
            return time_param

        timestamp_value = datetime.datetime(1970, 1, 1, 0, 0, 0, 123000)

        def timestamp_func(timestamp_param):
            assert isinstance(timestamp_param, pd.Series)
            assert isinstance(timestamp_param[0], datetime.datetime), \
                'timestamp_param of wrong type %s !' % type(timestamp_param[0])
            assert timestamp_param[0] == timestamp_value, \
                'timestamp_param is wrong value %s, should be %s!' % (timestamp_param[0],
                                                                      timestamp_value)
            return timestamp_param

        self.t_env.register_function(
            "tinyint_func",
            udf(tinyint_func, [DataTypes.TINYINT()],
                DataTypes.TINYINT(),
                udf_type="pandas"))

        self.t_env.register_function(
            "smallint_func",
            udf(smallint_func, [DataTypes.SMALLINT()],
                DataTypes.SMALLINT(),
                udf_type="pandas"))

        self.t_env.register_function(
            "int_func",
            udf(int_func, [DataTypes.INT()],
                DataTypes.INT(),
                udf_type="pandas"))

        self.t_env.register_function(
            "bigint_func",
            udf(bigint_func, [DataTypes.BIGINT()],
                DataTypes.BIGINT(),
                udf_type="pandas"))

        self.t_env.register_function(
            "boolean_func",
            udf(boolean_func, [DataTypes.BOOLEAN()],
                DataTypes.BOOLEAN(),
                udf_type="pandas"))

        self.t_env.register_function(
            "float_func",
            udf(float_func, [DataTypes.FLOAT()],
                DataTypes.FLOAT(),
                udf_type="pandas"))

        self.t_env.register_function(
            "double_func",
            udf(double_func, [DataTypes.DOUBLE()],
                DataTypes.DOUBLE(),
                udf_type="pandas"))

        self.t_env.register_function(
            "varchar_func",
            udf(varchar_func, [DataTypes.STRING()],
                DataTypes.STRING(),
                udf_type="pandas"))

        self.t_env.register_function(
            "varbinary_func",
            udf(varbinary_func, [DataTypes.BYTES()],
                DataTypes.BYTES(),
                udf_type="pandas"))

        self.t_env.register_function(
            "decimal_func",
            udf(decimal_func, [DataTypes.DECIMAL(38, 18)],
                DataTypes.DECIMAL(38, 18),
                udf_type="pandas"))

        self.t_env.register_function(
            "date_func",
            udf(date_func, [DataTypes.DATE()],
                DataTypes.DATE(),
                udf_type="pandas"))

        self.t_env.register_function(
            "time_func",
            udf(time_func, [DataTypes.TIME()],
                DataTypes.TIME(),
                udf_type="pandas"))

        self.t_env.register_function(
            "timestamp_func",
            udf(timestamp_func, [DataTypes.TIMESTAMP(3)],
                DataTypes.TIMESTAMP(3),
                udf_type="pandas"))

        table_sink = source_sink_utils.TestAppendSink([
            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
            'n', 'o', 'p'
        ], [
            DataTypes.TINYINT(),
            DataTypes.SMALLINT(),
            DataTypes.INT(),
            DataTypes.BIGINT(),
            DataTypes.BOOLEAN(),
            DataTypes.BOOLEAN(),
            DataTypes.FLOAT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING(),
            DataTypes.STRING(),
            DataTypes.BYTES(),
            DataTypes.DECIMAL(38, 18),
            DataTypes.DECIMAL(38, 18),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(3)
        ])
        self.t_env.register_table_sink("Results", table_sink)

        t = self.t_env.from_elements(
            [(1, 32767, -2147483648, 1, True, False, 1.0, 1.0, 'hello', '中文',
              bytearray(b'flink'), decimal.Decimal('1000000000000000000.05'),
              decimal.Decimal(
                  '1000000000000000000.05999999999999999899999999999'),
              datetime.date(2014, 9, 13),
              datetime.time(hour=1, minute=0, second=1), timestamp_value)],
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.TINYINT()),
                DataTypes.FIELD("b", DataTypes.SMALLINT()),
                DataTypes.FIELD("c", DataTypes.INT()),
                DataTypes.FIELD("d", DataTypes.BIGINT()),
                DataTypes.FIELD("e", DataTypes.BOOLEAN()),
                DataTypes.FIELD("f", DataTypes.BOOLEAN()),
                DataTypes.FIELD("g", DataTypes.FLOAT()),
                DataTypes.FIELD("h", DataTypes.DOUBLE()),
                DataTypes.FIELD("i", DataTypes.STRING()),
                DataTypes.FIELD("j", DataTypes.STRING()),
                DataTypes.FIELD("k", DataTypes.BYTES()),
                DataTypes.FIELD("l", DataTypes.DECIMAL(38, 18)),
                DataTypes.FIELD("m", DataTypes.DECIMAL(38, 18)),
                DataTypes.FIELD("n", DataTypes.DATE()),
                DataTypes.FIELD("o", DataTypes.TIME()),
                DataTypes.FIELD("p", DataTypes.TIMESTAMP(3))
            ]))

        t.select("tinyint_func(a),"
                 "smallint_func(b),"
                 "int_func(c),"
                 "bigint_func(d),"
                 "boolean_func(e),"
                 "boolean_func(f),"
                 "float_func(g),"
                 "double_func(h),"
                 "varchar_func(i),"
                 "varchar_func(j),"
                 "varbinary_func(k),"
                 "decimal_func(l),"
                 "decimal_func(m),"
                 "date_func(n),"
                 "time_func(o),"
                 "timestamp_func(p)") \
            .insert_into("Results")
        self.t_env.execute("test")
        actual = source_sink_utils.results()
        self.assert_equals(actual, [
            "1,32767,-2147483648,1,true,false,1.0,1.0,hello,中文,"
            "[102, 108, 105, 110, 107],1000000000000000000.050000000000000000,"
            "1000000000000000000.059999999999999999,2014-09-13,01:00:01,"
            "1970-01-01 00:00:00.123"
        ])
Exemplo n.º 8
0
    def test_all_data_types(self):
        def boolean_func(bool_param):
            assert isinstance(bool_param, bool), 'bool_param of wrong type %s !' \
                                                 % type(bool_param)
            return bool_param

        def tinyint_func(tinyint_param):
            assert isinstance(tinyint_param, int), 'tinyint_param of wrong type %s !' \
                                                   % type(tinyint_param)
            return tinyint_param

        def smallint_func(smallint_param):
            assert isinstance(smallint_param, int), 'smallint_param of wrong type %s !' \
                                                    % type(smallint_param)
            assert smallint_param == 32767, 'smallint_param of wrong value %s' % smallint_param
            return smallint_param

        def int_func(int_param):
            assert isinstance(int_param, int), 'int_param of wrong type %s !' \
                                               % type(int_param)
            assert int_param == -2147483648, 'int_param of wrong value %s' % int_param
            return int_param

        def bigint_func(bigint_param):
            assert isinstance(bigint_param, int), 'bigint_param of wrong type %s !' \
                                                  % type(bigint_param)
            return bigint_param

        def bigint_func_none(bigint_param):
            assert bigint_param is None, 'bigint_param %s should be None!' % bigint_param
            return bigint_param

        def float_func(float_param):
            assert isinstance(float_param, float) and float_equal(float_param, 1.23, 1e-6), \
                'float_param is wrong value %s !' % float_param
            return float_param

        def double_func(double_param):
            assert isinstance(double_param, float) and float_equal(double_param, 1.98932, 1e-7), \
                'double_param is wrong value %s !' % double_param
            return double_param

        def bytes_func(bytes_param):
            assert bytes_param == b'flink', \
                'bytes_param is wrong value %s !' % bytes_param
            return bytes_param

        def str_func(str_param):
            assert str_param == 'pyflink', \
                'str_param is wrong value %s !' % str_param
            return str_param

        def date_func(date_param):
            from datetime import date
            assert date_param == date(year=2014, month=9, day=13), \
                'date_param is wrong value %s !' % date_param
            return date_param

        def time_func(time_param):
            from datetime import time
            assert time_param == time(hour=12, minute=0, second=0, microsecond=123000), \
                'time_param is wrong value %s !' % time_param
            return time_param

        def timestamp_func(timestamp_param):
            from datetime import datetime
            assert timestamp_param == datetime(2018, 3, 11, 3, 0, 0, 123000), \
                'timestamp_param is wrong value %s !' % timestamp_param
            return timestamp_param

        self.t_env.register_function(
            "boolean_func",
            udf(boolean_func, [DataTypes.BOOLEAN()], DataTypes.BOOLEAN()))

        self.t_env.register_function(
            "tinyint_func",
            udf(tinyint_func, [DataTypes.TINYINT()], DataTypes.TINYINT()))

        self.t_env.register_function(
            "smallint_func",
            udf(smallint_func, [DataTypes.SMALLINT()], DataTypes.SMALLINT()))

        self.t_env.register_function(
            "int_func", udf(int_func, [DataTypes.INT()], DataTypes.INT()))

        self.t_env.register_function(
            "bigint_func",
            udf(bigint_func, [DataTypes.BIGINT()], DataTypes.BIGINT()))

        self.t_env.register_function(
            "bigint_func_none",
            udf(bigint_func_none, [DataTypes.BIGINT()], DataTypes.BIGINT()))

        self.t_env.register_function(
            "float_func",
            udf(float_func, [DataTypes.FLOAT()], DataTypes.FLOAT()))

        self.t_env.register_function(
            "double_func",
            udf(double_func, [DataTypes.DOUBLE()], DataTypes.DOUBLE()))

        self.t_env.register_function(
            "bytes_func",
            udf(bytes_func, [DataTypes.BYTES()], DataTypes.BYTES()))

        self.t_env.register_function(
            "str_func", udf(str_func, [DataTypes.STRING()],
                            DataTypes.STRING()))

        self.t_env.register_function(
            "date_func", udf(date_func, [DataTypes.DATE()], DataTypes.DATE()))

        self.t_env.register_function(
            "time_func", udf(time_func, [DataTypes.TIME(3)],
                             DataTypes.TIME(3)))

        self.t_env.register_function(
            "timestamp_func",
            udf(timestamp_func, [DataTypes.TIMESTAMP()],
                DataTypes.TIMESTAMP()))

        table_sink = source_sink_utils.TestAppendSink(
            ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm'],
            [
                DataTypes.BIGINT(),
                DataTypes.BIGINT(),
                DataTypes.TINYINT(),
                DataTypes.BOOLEAN(),
                DataTypes.SMALLINT(),
                DataTypes.INT(),
                DataTypes.FLOAT(),
                DataTypes.DOUBLE(),
                DataTypes.BYTES(),
                DataTypes.STRING(),
                DataTypes.DATE(),
                DataTypes.TIME(3),
                DataTypes.TIMESTAMP()
            ])
        self.t_env.register_table_sink("Results", table_sink)

        import datetime
        t = self.t_env.from_elements(
            [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
              bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13),
              datetime.time(hour=12, minute=0, second=0, microsecond=123000),
              datetime.datetime(2018, 3, 11, 3, 0, 0, 123000))],
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.BIGINT()),
                DataTypes.FIELD("c", DataTypes.TINYINT()),
                DataTypes.FIELD("d", DataTypes.BOOLEAN()),
                DataTypes.FIELD("e", DataTypes.SMALLINT()),
                DataTypes.FIELD("f", DataTypes.INT()),
                DataTypes.FIELD("g", DataTypes.FLOAT()),
                DataTypes.FIELD("h", DataTypes.DOUBLE()),
                DataTypes.FIELD("i", DataTypes.BYTES()),
                DataTypes.FIELD("j", DataTypes.STRING()),
                DataTypes.FIELD("k", DataTypes.DATE()),
                DataTypes.FIELD("l", DataTypes.TIME(3)),
                DataTypes.FIELD("m", DataTypes.TIMESTAMP())
            ]))

        t.select("bigint_func(a), bigint_func_none(b),"
                 "tinyint_func(c), boolean_func(d),"
                 "smallint_func(e),int_func(f),"
                 "float_func(g),double_func(h),"
                 "bytes_func(i),str_func(j),"
                 "date_func(k),time_func(l),"
                 "timestamp_func(m)") \
            .insert_into("Results")
        self.t_env.execute("test")
        actual = source_sink_utils.results()
        # Currently the sink result precision of DataTypes.TIME(precision) only supports 0.
        self.assert_equals(actual, [
            "1,null,1,true,32767,-2147483648,1.23,1.98932,"
            "[102, 108, 105, 110, 107],pyflink,2014-09-13,"
            "12:00:00,2018-03-11 03:00:00.123"
        ])
Exemplo n.º 9
0
    def test_from_element(self):
        t_env = self.t_env
        field_names = [
            "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
            "n", "o", "p", "q"
        ]
        field_types = [
            DataTypes.BIGINT(),
            DataTypes.DOUBLE(),
            DataTypes.STRING(),
            DataTypes.STRING(),
            DataTypes.DATE(),
            DataTypes.TIME(),
            DataTypes.TIMESTAMP(3),
            DataTypes.INTERVAL(DataTypes.SECOND(3)),
            DataTypes.ARRAY(DataTypes.DOUBLE()),
            DataTypes.ARRAY(DataTypes.DOUBLE(False)),
            DataTypes.ARRAY(DataTypes.STRING()),
            DataTypes.ARRAY(DataTypes.DATE()),
            DataTypes.DECIMAL(38, 18),
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.DOUBLE())
            ]),
            DataTypes.MAP(DataTypes.STRING(), DataTypes.DOUBLE()),
            DataTypes.BYTES(),
            PythonOnlyUDT()
        ]
        schema = DataTypes.ROW(
            list(
                map(
                    lambda field_name, field_type: DataTypes.FIELD(
                        field_name, field_type), field_names, field_types)))

        sink_table_ddl = """
            CREATE TABLE Results(
            a BIGINT,
            b DOUBLE,
            c STRING,
            d STRING,
            e DATE,
            f TIME,
            g TIMESTAMP(3),
            h INT,
            i ARRAY<DOUBLE>,
            j ARRAY<DOUBLE NOT NULL>,
            k ARRAY<STRING>,
            l ARRAY<DATE>,
            m DECIMAL(38, 18),
            n ROW<a BIGINT, b DOUBLE>,
            o MAP<STRING, DOUBLE>,
            p BYTES,
            q ARRAY<DOUBLE NOT NULL>)
            WITH ('connector'='test-sink')
        """
        self.t_env.execute_sql(sink_table_ddl)

        t = t_env.from_elements(
            [(1, 1.0, "hi", "hello", datetime.date(1970, 1, 2),
              datetime.time(1, 0, 0), datetime.datetime(1970, 1, 2, 0, 0),
              datetime.timedelta(days=1, microseconds=10), [1.0, None],
              array.array("d", [1.0, 2.0]), ["abc"],
              [datetime.date(1970, 1, 2)], Decimal(1), Row("a", "b")(1, 2.0), {
                  "key": 1.0
              }, bytearray(b'ABCD'), PythonOnlyPoint(3.0, 4.0))], schema)
        t.execute_insert("Results").wait()
        actual = source_sink_utils.results()

        expected = [
            '+I[1, 1.0, hi, hello, 1970-01-02, 01:00, 1970-01-02T00:00, '
            '86400, [1.0, null], [1.0, 2.0], [abc], [1970-01-02], '
            '1.000000000000000000, +I[1, 2.0], {key=1.0}, [65, 66, 67, 68], [3.0, 4.0]]'
        ]
        self.assert_equals(actual, expected)
Exemplo n.º 10
0
    def test_all_data_types(self):
        def boolean_func(bool_param):
            assert isinstance(bool_param, bool), 'bool_param of wrong type %s !' \
                                                 % type(bool_param)
            return bool_param

        def tinyint_func(tinyint_param):
            assert isinstance(tinyint_param, int), 'tinyint_param of wrong type %s !' \
                                                   % type(tinyint_param)
            return tinyint_param

        def smallint_func(smallint_param):
            assert isinstance(smallint_param, int), 'smallint_param of wrong type %s !' \
                                                    % type(smallint_param)
            assert smallint_param == 32767, 'smallint_param of wrong value %s' % smallint_param
            return smallint_param

        def int_func(int_param):
            assert isinstance(int_param, int), 'int_param of wrong type %s !' \
                                               % type(int_param)
            assert int_param == -2147483648, 'int_param of wrong value %s' % int_param
            return int_param

        def bigint_func(bigint_param):
            assert isinstance(bigint_param, int), 'bigint_param of wrong type %s !' \
                                                  % type(bigint_param)
            return bigint_param

        def bigint_func_none(bigint_param):
            assert bigint_param is None, 'bigint_param %s should be None!' % bigint_param
            return bigint_param

        def float_func(float_param):
            assert isinstance(float_param, float) and float_equal(float_param, 1.23, 1e-6), \
                'float_param is wrong value %s !' % float_param
            return float_param

        def double_func(double_param):
            assert isinstance(double_param, float) and float_equal(double_param, 1.98932, 1e-7), \
                'double_param is wrong value %s !' % double_param
            return double_param

        def bytes_func(bytes_param):
            assert bytes_param == b'flink', \
                'bytes_param is wrong value %s !' % bytes_param
            return bytes_param

        def str_func(str_param):
            assert str_param == 'pyflink', \
                'str_param is wrong value %s !' % str_param
            return str_param

        def date_func(date_param):
            from datetime import date
            assert date_param == date(year=2014, month=9, day=13), \
                'date_param is wrong value %s !' % date_param
            return date_param

        def time_func(time_param):
            from datetime import time
            assert time_param == time(hour=12, minute=0, second=0, microsecond=123000), \
                'time_param is wrong value %s !' % time_param
            return time_param

        def timestamp_func(timestamp_param):
            from datetime import datetime
            assert timestamp_param == datetime(2018, 3, 11, 3, 0, 0, 123000), \
                'timestamp_param is wrong value %s !' % timestamp_param
            return timestamp_param

        def array_func(array_param):
            assert array_param == [[1, 2, 3]] or array_param == ((1, 2, 3),), \
                'array_param is wrong value %s !' % array_param
            return array_param[0]

        def map_func(map_param):
            assert map_param == {1: 'flink', 2: 'pyflink'}, \
                'map_param is wrong value %s !' % map_param
            return map_param

        def decimal_func(decimal_param):
            from decimal import Decimal
            assert decimal_param == Decimal('1000000000000000000.050000000000000000'), \
                'decimal_param is wrong value %s !' % decimal_param
            return decimal_param

        def decimal_cut_func(decimal_param):
            from decimal import Decimal
            assert decimal_param == Decimal('1000000000000000000.059999999999999999'), \
                'decimal_param is wrong value %s !' % decimal_param
            return decimal_param

        self.t_env.create_temporary_system_function(
            "boolean_func", udf(boolean_func, result_type=DataTypes.BOOLEAN()))

        self.t_env.create_temporary_system_function(
            "tinyint_func", udf(tinyint_func, result_type=DataTypes.TINYINT()))

        self.t_env.create_temporary_system_function(
            "smallint_func",
            udf(smallint_func, result_type=DataTypes.SMALLINT()))

        self.t_env.create_temporary_system_function(
            "int_func", udf(int_func, result_type=DataTypes.INT()))

        self.t_env.create_temporary_system_function(
            "bigint_func", udf(bigint_func, result_type=DataTypes.BIGINT()))

        self.t_env.create_temporary_system_function(
            "bigint_func_none",
            udf(bigint_func_none, result_type=DataTypes.BIGINT()))

        self.t_env.create_temporary_system_function(
            "float_func", udf(float_func, result_type=DataTypes.FLOAT()))

        self.t_env.create_temporary_system_function(
            "double_func", udf(double_func, result_type=DataTypes.DOUBLE()))

        self.t_env.create_temporary_system_function(
            "bytes_func", udf(bytes_func, result_type=DataTypes.BYTES()))

        self.t_env.create_temporary_system_function(
            "str_func", udf(str_func, result_type=DataTypes.STRING()))

        self.t_env.create_temporary_system_function(
            "date_func", udf(date_func, result_type=DataTypes.DATE()))

        self.t_env.create_temporary_system_function(
            "time_func", udf(time_func, result_type=DataTypes.TIME()))

        self.t_env.create_temporary_system_function(
            "timestamp_func",
            udf(timestamp_func, result_type=DataTypes.TIMESTAMP(3)))

        self.t_env.create_temporary_system_function(
            "array_func",
            udf(array_func, result_type=DataTypes.ARRAY(DataTypes.BIGINT())))

        self.t_env.create_temporary_system_function(
            "map_func",
            udf(map_func,
                result_type=DataTypes.MAP(DataTypes.BIGINT(),
                                          DataTypes.STRING())))

        self.t_env.register_function(
            "decimal_func",
            udf(decimal_func, result_type=DataTypes.DECIMAL(38, 18)))

        self.t_env.register_function(
            "decimal_cut_func",
            udf(decimal_cut_func, result_type=DataTypes.DECIMAL(38, 18)))

        sink_table_ddl = """
            CREATE TABLE Results(
            a BIGINT, b BIGINT, c TINYINT, d BOOLEAN, e SMALLINT, f INT, g FLOAT, h DOUBLE,
            i BYTES, j STRING, k DATE, l TIME, m TIMESTAMP(3), n ARRAY<BIGINT>,
            o MAP<BIGINT, STRING>, p DECIMAL(38, 18), q DECIMAL(38, 18))
            WITH ('connector'='test-sink')
        """
        self.t_env.execute_sql(sink_table_ddl)

        import datetime
        import decimal
        t = self.t_env.from_elements(
            [(1, None, 1, True, 32767, -2147483648, 1.23, 1.98932,
              bytearray(b'flink'), 'pyflink', datetime.date(2014, 9, 13),
              datetime.time(hour=12, minute=0, second=0, microsecond=123000),
              datetime.datetime(2018, 3, 11, 3, 0, 0, 123000), [[1, 2, 3]], {
                  1: 'flink',
                  2: 'pyflink'
              }, decimal.Decimal('1000000000000000000.05'),
              decimal.Decimal(
                  '1000000000000000000.05999999999999999899999999999'))],
            DataTypes.ROW([
                DataTypes.FIELD("a", DataTypes.BIGINT()),
                DataTypes.FIELD("b", DataTypes.BIGINT()),
                DataTypes.FIELD("c", DataTypes.TINYINT()),
                DataTypes.FIELD("d", DataTypes.BOOLEAN()),
                DataTypes.FIELD("e", DataTypes.SMALLINT()),
                DataTypes.FIELD("f", DataTypes.INT()),
                DataTypes.FIELD("g", DataTypes.FLOAT()),
                DataTypes.FIELD("h", DataTypes.DOUBLE()),
                DataTypes.FIELD("i", DataTypes.BYTES()),
                DataTypes.FIELD("j", DataTypes.STRING()),
                DataTypes.FIELD("k", DataTypes.DATE()),
                DataTypes.FIELD("l", DataTypes.TIME()),
                DataTypes.FIELD("m", DataTypes.TIMESTAMP(3)),
                DataTypes.FIELD(
                    "n", DataTypes.ARRAY(DataTypes.ARRAY(DataTypes.BIGINT()))),
                DataTypes.FIELD(
                    "o", DataTypes.MAP(DataTypes.BIGINT(),
                                       DataTypes.STRING())),
                DataTypes.FIELD("p", DataTypes.DECIMAL(38, 18)),
                DataTypes.FIELD("q", DataTypes.DECIMAL(38, 18))
            ]))

        t.select(call("bigint_func", t.a), call("bigint_func_none", t.b),
                 call("tinyint_func", t.c), call("boolean_func", t.d),
                 call("smallint_func", t.e), call("int_func", t.f),
                 call("float_func", t.g), call("double_func", t.h),
                 call("bytes_func", t.i), call("str_func", t.j),
                 call("date_func", t.k), call("time_func", t.l),
                 call("timestamp_func", t.m), call("array_func", t.n),
                 call("map_func", t.o), call("decimal_func", t.p),
                 call("decimal_cut_func", t.q)) \
            .execute_insert("Results").wait()
        actual = source_sink_utils.results()
        # Currently the sink result precision of DataTypes.TIME(precision) only supports 0.
        self.assert_equals(actual, [
            "+I[1, null, 1, true, 32767, -2147483648, 1.23, 1.98932, "
            "[102, 108, 105, 110, 107], pyflink, 2014-09-13, "
            "12:00:00.123, 2018-03-11T03:00:00.123, [1, 2, 3], "
            "{1=flink, 2=pyflink}, 1000000000000000000.050000000000000000, "
            "1000000000000000000.059999999999999999]"
        ])