def _build_parquet_columnar_job(self, row_type: RowType): source = FileSource.for_bulk_file_format( ParquetColumnarRowInputFormat(row_type, Configuration(), 10, True, False), self.parquet_file_name ).build() ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(), 'parquet-source') ds.map(lambda e: e).add_sink(self.test_sink)
def _build_csv_job(self, schema): source = FileSource.for_record_stream_format( CsvReaderFormat.for_schema(schema), self.csv_file_name).build() ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(), 'csv-source') ds.map(PassThroughMapFunction(), output_type=Types.PICKLED_BYTE_ARRAY()) \ .add_sink(self.test_sink)
def _build_parquet_columnar_job(self, row_type: RowType, parquet_file_name: str): source = FileSource.for_bulk_file_format( ParquetColumnarRowInputFormat(Configuration(), row_type, 10, True, True), parquet_file_name).build() ds = self.env.from_source(source, WatermarkStrategy.no_watermarks(), 'parquet-source') ds.map(PassThroughMapFunction()).add_sink(self.test_sink)
def test_no_watermarks(self): jvm = get_gateway().jvm j_watermark_strategy = WatermarkStrategy.no_watermarks( )._j_watermark_strategy self.assertTrue( is_instance_of( j_watermark_strategy.createWatermarkGenerator(None), jvm.org. apache.flink.api.common.eventtime.NoWatermarksGenerator))
def test_with_idleness(self): jvm = get_gateway().jvm j_watermark_strategy = WatermarkStrategy.no_watermarks().with_idleness( Duration.of_seconds(5))._j_watermark_strategy self.assertTrue( is_instance_of( j_watermark_strategy, jvm.org.apache.flink.api.common. eventtime.WatermarkStrategyWithIdleness)) self.assertEqual( get_field_value(j_watermark_strategy, "idlenessTimeout").toMillis(), 5000)
def test_with_watermark_alignment(self): jvm = get_gateway().jvm j_watermark_strategy = WatermarkStrategy.no_watermarks( ).with_watermark_alignment( "alignment-group-1", Duration.of_seconds(20), Duration.of_seconds(10))._j_watermark_strategy self.assertTrue( is_instance_of( j_watermark_strategy, jvm.org.apache.flink.api.common. eventtime.WatermarksWithWatermarkAlignment)) alignment_parameters = j_watermark_strategy.getAlignmentParameters() self.assertEqual(alignment_parameters.getWatermarkGroup(), "alignment-group-1") self.assertEqual(alignment_parameters.getMaxAllowedWatermarkDrift(), 20000) self.assertEqual(alignment_parameters.getUpdateInterval(), 10000)