def test_batched_read(self): with tempfile.NamedTemporaryFile() as dst: path = dst.name with TestPipeline() as p: _ = p \ | Create(self.RECORDS, reshuffle=False) \ | WriteToParquet( path, self.SCHEMA, num_shards=1, shard_name_template='') with TestPipeline() as p: # json used for stable sortability readback = \ p \ | ReadFromParquetBatched(path) assert_that(readback, equal_to([self._records_as_arrow()]))
def test_batched_read(self): with TemporaryDirectory() as tmp_dirname: path = os.path.join(tmp_dirname + "tmp_filename") with TestPipeline() as p: _ = p \ | Create(self.RECORDS, reshuffle=False) \ | WriteToParquet( path, self.SCHEMA, num_shards=1, shard_name_template='') with TestPipeline() as p: # json used for stable sortability readback = \ p \ | ReadFromParquetBatched(path) assert_that(readback, equal_to([self._records_as_arrow()]))
def test_read_display_data(self): file_name = 'some_parquet_source' read = \ ReadFromParquet( file_name, validate=False) read_batched = \ ReadFromParquetBatched( file_name, validate=False) expected_items = [ DisplayDataItemMatcher('compression', 'auto'), DisplayDataItemMatcher('file_pattern', file_name) ] hc.assert_that( DisplayData.create_from(read).items, hc.contains_inanyorder(*expected_items)) hc.assert_that( DisplayData.create_from(read_batched).items, hc.contains_inanyorder(*expected_items))