def test_multiple_files(self): """Forces records to be written to many files. For each destination multiple files are necessary. This is because the max file length is very small, so only a couple records fit in each file. """ fn = bqfl.WriteGroupedRecordsToFile(max_file_size=50, coder=CustomRowCoder()) self.tmpdir = self._new_tempdir() def check_multiple_files(output_pc): files_per_dest = output_pc | beam.combiners.Count.PerKey() files_per_dest = ( files_per_dest | "GetDests" >> beam.Map(lambda x: ( bigquery_tools.get_hashable_destination(x[0]), x[1]))) assert_that( files_per_dest, equal_to([ ('project1:dataset1.table1', 4), ('project1:dataset1.table2', 2), ('project1:dataset1.table3', 1), ])) # Check that the files exist _ = output_pc | beam.Map(lambda x: x[1]) | beam.Map(os.path.exists) self._consume_input(fn, _DESTINATION_ELEMENT_PAIRS, check_multiple_files)
def test_files_are_created(self): """Test that the files are created and written.""" fn = bqfl.WriteGroupedRecordsToFile(coder=CustomRowCoder()) self.tmpdir = self._new_tempdir() def check_files_created(output_pc): files = output_pc | "GetFiles" >> beam.Map(lambda x: x[1]) file_count = files | "CountFiles" >> beam.combiners.Count.Globally( ) _ = files | "FilesExist" >> beam.Map( lambda x: hamcrest_assert(os.path.exists(x), is_(True))) assert_that(file_count, equal_to([3]), label='check file count') destinations = ( output_pc | "GetDests" >> beam.Map( lambda x: bigquery_tools.get_hashable_destination(x[0]))) assert_that(destinations, equal_to(list(_DISTINCT_DESTINATIONS)), label='check destinations ') self._consume_input(fn, _DESTINATION_ELEMENT_PAIRS, check_files_created)