Exemplo n.º 1
0
 def _build_parquet_columnar_job(self, row_type: RowType):
     source = FileSource.for_bulk_file_format(
         ParquetColumnarRowInputFormat(row_type, Configuration(), 10, True, False),
         self.parquet_file_name
     ).build()
     ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(), 'parquet-source')
     ds.map(lambda e: e).add_sink(self.test_sink)
Exemplo n.º 2
0
 def _build_csv_job(self, schema):
     source = FileSource.for_record_stream_format(
         CsvReaderFormat.for_schema(schema), self.csv_file_name).build()
     ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(),
                               'csv-source')
     ds.map(PassThroughMapFunction(), output_type=Types.PICKLED_BYTE_ARRAY()) \
         .add_sink(self.test_sink)
Exemplo n.º 3
0
 def _build_parquet_columnar_job(self, row_type: RowType,
                                 parquet_file_name: str):
     source = FileSource.for_bulk_file_format(
         ParquetColumnarRowInputFormat(Configuration(), row_type, 10, True,
                                       True), parquet_file_name).build()
     ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(),
                               'parquet-source')
     ds.map(PassThroughMapFunction()).add_sink(self.test_sink)
Exemplo n.º 4
0
 def test_no_watermarks(self):
     jvm = get_gateway().jvm
     j_watermark_strategy = WatermarkStrategy.no_watermarks(
     )._j_watermark_strategy
     self.assertTrue(
         is_instance_of(
             j_watermark_strategy.createWatermarkGenerator(None), jvm.org.
             apache.flink.api.common.eventtime.NoWatermarksGenerator))
Exemplo n.º 5
0
 def test_with_idleness(self):
     jvm = get_gateway().jvm
     j_watermark_strategy = WatermarkStrategy.no_watermarks().with_idleness(
         Duration.of_seconds(5))._j_watermark_strategy
     self.assertTrue(
         is_instance_of(
             j_watermark_strategy, jvm.org.apache.flink.api.common.
             eventtime.WatermarkStrategyWithIdleness))
     self.assertEqual(
         get_field_value(j_watermark_strategy,
                         "idlenessTimeout").toMillis(), 5000)
Exemplo n.º 6
0
 def test_with_watermark_alignment(self):
     jvm = get_gateway().jvm
     j_watermark_strategy = WatermarkStrategy.no_watermarks(
     ).with_watermark_alignment(
         "alignment-group-1", Duration.of_seconds(20),
         Duration.of_seconds(10))._j_watermark_strategy
     self.assertTrue(
         is_instance_of(
             j_watermark_strategy, jvm.org.apache.flink.api.common.
             eventtime.WatermarksWithWatermarkAlignment))
     alignment_parameters = j_watermark_strategy.getAlignmentParameters()
     self.assertEqual(alignment_parameters.getWatermarkGroup(),
                      "alignment-group-1")
     self.assertEqual(alignment_parameters.getMaxAllowedWatermarkDrift(),
                      20000)
     self.assertEqual(alignment_parameters.getUpdateInterval(), 10000)