Пример #1
0
 def _build_csv_job(self, schema, lines):
     with open(self.csv_file_name, 'w') as f:
         for line in lines:
             f.write(line)
     source = FileSource.for_record_stream_format(
         CsvReaderFormat.for_schema(schema), self.csv_file_name).build()
     ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(),
                               'csv-source')
     ds.map(PassThroughMapFunction(), output_type=Types.PICKLED_BYTE_ARRAY()) \
         .add_sink(self.test_sink)
Пример #2
0
 def _build_csv_job(self, schema: CsvSchema, lines):
     with open(self.csv_file_name, 'w') as f:
         for line in lines:
             f.write(line)
     source = FileSource.for_record_stream_format(
         CsvReaderFormat.for_schema(schema), self.csv_file_name).build()
     ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(),
                               'csv-source')
     sink = FileSink.for_bulk_format(
         self.csv_dir_name, CsvBulkWriters.for_schema(schema)).build()
     ds.sink_to(sink)