Exemplo n.º 1
0
    def test_field(self):
        csv = OldCsv()

        csv.field("a", DataTypes.BIGINT())
        csv.field("b", DataTypes.STRING())
        csv.field("c", "SQL_TIMESTAMP")

        properties = csv.to_properties()
        expected = {'format.fields.0.name': 'a',
                    'format.fields.0.data-type': 'BIGINT',
                    'format.fields.1.name': 'b',
                    'format.fields.1.data-type': 'VARCHAR(2147483647)',
                    'format.fields.2.name': 'c',
                    'format.fields.2.data-type': 'TIMESTAMP(3)',
                    'format.type': 'csv',
                    'format.property-version': '1'}
        self.assertEqual(expected, properties)
Exemplo n.º 2
0
# @Author : mh
# @Site :
# @File : TalbeAPI.py
# @Software: PyCharm

from pyflink.dataset import ExecutionEnvironment
from pyflink.table import TableConfig, DataTypes, BatchTableEnvironment
from pyflink.table.descriptors import Schema, OldCsv, FileSystem

exec_env = ExecutionEnvironment.get_execution_environment()
exec_env.set_parallelism(3)
t_config = TableConfig()
t_evn = BatchTableEnvironment.create(exec_env, t_config)

t_evn.connect(FileSystem().path('/tmp/input')) \
    .with_format(OldCsv.field('word'), DataTypes.STRING()) \
    .with_schema(Schema().field('word'), DataTypes.STRING()) \
    .create_temporary_table('mySource')

t_evn.connect(FileSystem.path('/tmp/output')) \
    .with_format(OldCsv
                 .field_delimiter('\t')
                 .field('word', DataTypes.STRING())
                 .field('count', DataTypes.BIGINT())) \
    .with_schema(Schema.field('word', DataTypes.STRING())
                 .field('count', DataTypes.BIGINT())) \
    .create_temporary_table('mySink')

t_evn.from_path('mySource').group_by('word').select(
    'word,count(1)').insert_into('mySink')
t_evn.execute('tutorial_job')