def test_path(self): file_system = FileSystem() file_system = file_system.path("/test.csv") properties = file_system.to_properties() expected = {'connector.property-version': '1', 'connector.type': 'filesystem', 'connector.path': '/test.csv'} self.assertEqual(expected, properties)
# @Site : # @File : TalbeAPI.py # @Software: PyCharm from pyflink.dataset import ExecutionEnvironment from pyflink.table import TableConfig, DataTypes, BatchTableEnvironment from pyflink.table.descriptors import Schema, OldCsv, FileSystem exec_env = ExecutionEnvironment.get_execution_environment() exec_env.set_parallelism(3) t_config = TableConfig() t_evn = BatchTableEnvironment.create(exec_env, t_config) t_evn.connect(FileSystem().path('/tmp/input')) \ .with_format(OldCsv.field('word'), DataTypes.STRING()) \ .with_schema(Schema().field('word'), DataTypes.STRING()) \ .create_temporary_table('mySource') t_evn.connect(FileSystem.path('/tmp/output')) \ .with_format(OldCsv .field_delimiter('\t') .field('word', DataTypes.STRING()) .field('count', DataTypes.BIGINT())) \ .with_schema(Schema.field('word', DataTypes.STRING()) .field('count', DataTypes.BIGINT())) \ .create_temporary_table('mySink') t_evn.from_path('mySource').group_by('word').select( 'word,count(1)').insert_into('mySink') t_evn.execute('tutorial_job')