Exemplo n.º 1
0
    def test_schema(self):
        table_schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()])

        schema = Schema().schema(table_schema)

        properties = schema.to_properties()
        expected = {'schema.0.name': 'a',
                    'schema.0.data-type': 'INT',
                    'schema.1.name': 'b',
                    'schema.1.data-type': 'VARCHAR(2147483647)'}
        self.assertEqual(expected, properties)
Exemplo n.º 2
0
    def get_schema(self) -> TableSchema:
        """
        Get the schema of the table.

        :return: Schema of the table/view.

        . note:: Deprecated in 1.14. This method returns the deprecated TableSchema class. The old
        class was a hybrid of resolved and unresolved schema information. It has been replaced by
        the new Schema which is always unresolved and will be resolved by the framework later.
        """
        return TableSchema(
            j_table_schema=self._j_catalog_base_table.getSchema())
Exemplo n.º 3
0
    def test_schema():
        schema = Schema()
        table_schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()])

        schema = schema.schema(table_schema)

        properties = schema.to_properties()
        expected = {'schema.0.name': 'a',
                    'schema.0.type': 'INT',
                    'schema.1.name': 'b',
                    'schema.1.type': 'VARCHAR'}
        assert properties == expected
Exemplo n.º 4
0
    def test_get_field_data_type(self):
        schema = TableSchema(["a", "b", "c"],
                             [DataTypes.INT(), DataTypes.BIGINT(), DataTypes.STRING()])

        type_by_name = schema.get_field_data_type("b")
        type_by_index = schema.get_field_data_type(2)
        type_by_name_not_exist = schema.get_field_data_type("d")
        type_by_index_not_exist = schema.get_field_data_type(6)
        with self.assertRaises(TypeError):
            schema.get_field_data_type(None)

        self.assertEqual(DataTypes.BIGINT(), type_by_name)
        self.assertEqual(DataTypes.STRING(), type_by_index)
        self.assertIsNone(type_by_name_not_exist)
        self.assertIsNone(type_by_index_not_exist)
Exemplo n.º 5
0
    def test_get_schema(self):
        t_env = self.t_env
        t = t_env.from_elements([(1, 'Hi', 'Hello'), (2, 'Hello', 'Hello'), (2, 'Hello', 'Hello')],
                                ['a', 'b', 'c'])
        field_names = ["a", "b"]
        field_types = [DataTypes.BIGINT(), DataTypes.STRING()]
        t_env.register_table_sink(
            "Results",
            field_names, field_types, source_sink_utils.TestRetractSink())

        result = t.group_by("c").select("a.sum as a, c as b")
        schema = result.get_schema()

        assert schema == TableSchema(["a", "b"], [DataTypes.BIGINT(), DataTypes.STRING()])
Exemplo n.º 6
0
    def test_schema(self):
        csv = OldCsv()
        schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()])

        csv = csv.schema(schema)

        properties = csv.to_properties()
        expected = {'format.fields.0.name': 'a',
                    'format.fields.0.data-type': 'INT',
                    'format.fields.1.name': 'b',
                    'format.fields.1.data-type': 'VARCHAR(2147483647)',
                    'format.type': 'csv',
                    'format.property-version': '1'}

        self.assertEqual(expected, properties)
Exemplo n.º 7
0
    def test_schema():
        csv = OldCsv()
        schema = TableSchema(["a", "b"], [DataTypes.INT(), DataTypes.STRING()])

        csv = csv.schema(schema)

        properties = csv.to_properties()
        expected = {'format.fields.0.name': 'a',
                    'format.fields.0.type': 'INT',
                    'format.fields.1.name': 'b',
                    'format.fields.1.type': 'VARCHAR',
                    'format.type': 'csv',
                    'format.property-version': '1'}

        assert properties == expected
Exemplo n.º 8
0
def table_java_inference():
    prep_data()
    testing_server = start_zk_server()
    generate_model()
    stream_env = StreamExecutionEnvironment.get_execution_environment()
    table_env = TableEnvironment.get_table_environment(stream_env)
    stream_env.set_parallelism(2)
    train_data_path = "file://" + get_root_path(
    ) + "/examples/target/data/test/"
    paths = [train_data_path + "0.tfrecords", train_data_path + "1.tfrecords"]
    src_row_type = RowType([StringType(), IntegerType()],
                           ["image_raw", "label"])
    table_src = TFRTableSource(
        paths=paths,
        epochs=1,
        out_row_type=src_row_type,
        converters=[ScalarConverter.FIRST, ScalarConverter.ONE_HOT])
    tfr_tbl_name = "tfr_input_table"
    table_env.register_table_source(tfr_tbl_name, table_src)
    ext_func_name = "tfr_extract"
    table_env.register_function(ext_func_name, java_inference_extract_func())
    out_cols = 'image,org_label'
    in_cols = ','.join(src_row_type.fields_names)
    extracted = table_env.sql_query(
        'select {} from {}, LATERAL TABLE({}({})) as T({})'.format(
            out_cols, tfr_tbl_name, ext_func_name, in_cols, out_cols))
    builder = TableSchema.Builder()
    builder.column(name='real_label',
                   data_type=LongType()).column(name='predicted_label',
                                                data_type=LongType())
    output_schema = builder.build()
    props = build_props('0')
    props[TF_INFERENCE_EXPORT_PATH] = export_path
    props[TF_INFERENCE_INPUT_TENSOR_NAMES] = 'image'
    props[TF_INFERENCE_OUTPUT_TENSOR_NAMES] = 'prediction'
    props[TF_INFERENCE_OUTPUT_ROW_FIELDS] = ','.join(
        ['org_label', 'prediction'])
    output_table = tensorflow_on_flink_table.inference(
        num_worker=2,
        properties=props,
        stream_env=stream_env,
        table_env=table_env,
        input_table=extracted,
        output_schema=output_schema)
    output_table.write_to_sink(LogInferAccSink())
    table_env.generate_stream_graph()
    stream_env.execute()
    testing_server.stop()
Exemplo n.º 9
0
    def get_table_schema(self):
        """
        Get the schema of result.

        The schema of DDL, USE, EXPLAIN:
        ::

            +-------------+-------------+----------+
            | column name | column type | comments |
            +-------------+-------------+----------+
            | result      | STRING      |          |
            +-------------+-------------+----------+

        The schema of SHOW:
        ::

            +---------------+-------------+----------+
            |  column name  | column type | comments |
            +---------------+-------------+----------+
            | <object name> | STRING      |          |
            +---------------+-------------+----------+
            The column name of `SHOW CATALOGS` is "catalog name",
            the column name of `SHOW DATABASES` is "database name",
            the column name of `SHOW TABLES` is "table name",
            the column name of `SHOW VIEWS` is "view name",
            the column name of `SHOW FUNCTIONS` is "function name".

        The schema of DESCRIBE:
        ::

            +------------------+-------------+-------------------------------------------------+
            | column name      | column type |                 comments                        |
            +------------------+-------------+-------------------------------------------------+
            | name             | STRING      | field name                                      |
            +------------------+-------------+-------------------------------------------------+
            | type             | STRING      | field type expressed as a String                |
            +------------------+-------------+-------------------------------------------------+
            | null             | BOOLEAN     | field nullability: true if a field is nullable, |
            |                  |             | else false                                      |
            +------------------+-------------+-------------------------------------------------+
            | key              | BOOLEAN     | key constraint: 'PRI' for primary keys,         |
            |                  |             | 'UNQ' for unique keys, else null                |
            +------------------+-------------+-------------------------------------------------+
            | computed column  | STRING      | computed column: string expression              |
            |                  |             | if a field is computed column, else null        |
            +------------------+-------------+-------------------------------------------------+
            | watermark        | STRING      | watermark: string expression if a field is      |
            |                  |             | watermark, else null                            |
            +------------------+-------------+-------------------------------------------------+

        The schema of INSERT: (one column per one sink)
        ::

            +----------------------------+-------------+-----------------------+
            | column name                | column type | comments              |
            +----------------------------+-------------+-----------------------+
            | (name of the insert table) | BIGINT      | the insert table name |
            +----------------------------+-------------+-----------------------+

        The schema of SELECT is the selected field names and types.

        :return: The schema of result.
        :rtype: pyflink.table.TableSchema

        .. versionadded:: 1.11.0
        """
        return TableSchema(
            j_table_schema=self._j_table_result.getTableSchema())
Exemplo n.º 10
0
    def test_repr(self):
        schema = TableSchema(["a", "b", "c"],
                             [DataTypes.INT(), DataTypes.BIGINT(), DataTypes.STRING()])

        expected = "root\n |-- a: INT\n |-- b: BIGINT\n |-- c: STRING\n"
        self.assertEqual(expected, repr(schema))
Exemplo n.º 11
0
    def test_get_schema(self):
        t = self.t_env.from_elements([(1, 'Hi', 'Hello')], ['a', 'b', 'c'])
        result = t.group_by("c").select("a.sum as a, c as b")
        schema = result.get_schema()

        assert schema == TableSchema(["a", "b"], [DataTypes.BIGINT(), DataTypes.STRING()])