def test_backfill(bq, gcs_partitioned_data, gcs_truncating_load_config, gcs_bucket, dest_partitioned_table): """ This is an adaptation of test_load_job_partitioned but instead uses the backfill CLI code path to execute the cloud function's main method in parallel threads. Test loading separate partitions with WRITE_TRUNCATE after both load jobs the count should equal the sum of the test data in both partitions despite having WRITE_TRUNCATE disposition because the destination table should target only a particular partition with a decorator. """ test_utils.check_blobs_exist( gcs_truncating_load_config, "the test is not configured correctly the load.json is missing") test_utils.check_blobs_exist(gcs_partitioned_data, "test data objects must exist") expected_num_rows = 0 for part in [ "$2017041101", "$2017041102", ]: test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nyc_311", part, "nyc_311.csv") expected_num_rows += sum(1 for _ in open(test_data_file)) args = backfill.parse_args([ f"--gcs-path=gs://{gcs_bucket.name}", "--mode=LOCAL", ]) backfill.main(args) test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
def test_load_job(bq, gcs_data, dest_table): """tests basic single invocation with load job""" test_utils.check_blobs_exist(gcs_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_data) test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") expected_num_rows = sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_table, expected_num_rows)
def test_duplicate_success_notification(bq, gcs_data, dest_table): """tests behavior with two notifications for the same success file.""" test_utils.check_blobs_exist(gcs_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_data) test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") expected_num_rows = sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_table, expected_num_rows)
def test_backlog_subscriber_in_order_with_new_batch_after_exit( bq, gcs, gcs_bucket, dest_dataset, dest_ordered_update_table, gcs_ordered_update_data, gcs_external_update_config, gcs_backlog): """Test basic functionality of backlog subscriber. Populate a backlog with 3 files that make updates where we can assert that these jobs were applied in order. To ensure that the subscriber cleans up properly after itself before exit, we will drop a 4th batch after the subscriber has exited and assert that it gets applied as expected. """ test_utils.check_blobs_exist(gcs_external_update_config, "config objects must exist") test_utils.check_blobs_exist(gcs_ordered_update_data, "test data objects must exist") for blob in gcs_external_update_config: basename = os.path.basename(blob.name) # Only perform the following actions for the backfill config file if basename == gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME: _run_subscriber(gcs, bq, blob) table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix( gcs, blob) backlog_blobs = gcs_bucket.list_blobs( prefix=f"{table_prefix}/_backlog/") assert backlog_blobs.num_results == 0, "backlog is not empty" bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock") assert not bqlock_blob.exists(), "_bqlock was not cleaned up" rows = bq.query("SELECT alpha_update FROM " f"{dest_ordered_update_table.dataset_id}" f".{dest_ordered_update_table.table_id}") expected_num_rows = 1 num_rows = 0 for row in rows: num_rows += 1 assert row[ "alpha_update"] == "ABC", "backlog not applied in order" assert num_rows == expected_num_rows # Now we will test what happens when the publisher posts another # batch after the backlog subscriber has exited. backfill_blob = _post_a_new_batch(gcs_bucket, dest_ordered_update_table) assert backfill_blob is not None _run_subscriber(gcs, bq, backfill_blob) rows = bq.query("SELECT alpha_update FROM " f"{dest_ordered_update_table.dataset_id}" f".{dest_ordered_update_table.table_id}") expected_num_rows = 1 num_rows = 0 for row in rows: num_rows += 1 assert row[ "alpha_update"] == "ABCD", "new incremental not applied" assert num_rows == expected_num_rows
def test_external_query_with_bad_statement(gcs_data, gcs_external_config_bad_statement): """tests the basic external query ingestion mechanics with bq_transform.sql and external.json """ test_utils.check_blobs_exist(gcs_external_config_bad_statement, "config objects must exist") test_utils.check_blobs_exist(gcs_data, "test data objects must exist") with pytest.raises(gcs_ocn_bq_ingest.common.exceptions.BigQueryJobFailure): test_utils.trigger_gcf_for_each_blob(gcs_data)
def test_look_for_config_in_parents(bq, gcs_data_under_sub_dirs, gcs_external_config, dest_table): """test discovery of configuration files for external query in parent _config paths. """ test_utils.check_blobs_exist(gcs_external_config, "config objects must exist") test_utils.check_blobs_exist(gcs_data_under_sub_dirs, "test data must exist") test_utils.trigger_gcf_for_each_blob(gcs_data_under_sub_dirs) test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") expected_num_rows = sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_table, expected_num_rows)
def test_partitioned_parquet(bq, gcs_split_path_partitioned_parquet_data, gcs_destination_parquet_config, dest_partitioned_table): """tests the basic load ingestion mechanics for parquet files """ test_utils.check_blobs_exist(gcs_destination_parquet_config, "config objects must exist") test_utils.check_blobs_exist(gcs_split_path_partitioned_parquet_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob( gcs_split_path_partitioned_parquet_data) expected_num_rows = 100 test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
def test_gcf_event_schema(bq, gcs_data, dest_table): """tests compatibility to Cloud Functions Background Function posting the storage object schema https://cloud.google.com/storage/docs/json_api/v1/objects#resource directly based on object finalize. https://cloud.google.com/functions/docs/tutorials/storage#functions_tutorial_helloworld_storage-python """ test_utils.check_blobs_exist(gcs_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_data) test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") expected_num_rows = sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_table, expected_num_rows)
def test_load_job_appending_batches(bq, gcs_batched_data, dest_table): """ tests two loading batches with the default load configuration. The total number of rows expected should be the number of rows in the test file multiplied by the number of batches because we should pick up the default WRITE_APPEND disposition. """ test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") test_count = sum(1 for _ in open(test_data_file)) expected_counts = 2 * test_count # 2 batches * num of test rows test_utils.check_blobs_exist(gcs_batched_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_batched_data) test_utils.bq_wait_for_rows(bq, dest_table, expected_counts)
def test_external_query_partitioned_parquet( bq, gcs_split_path_partitioned_parquet_data, gcs_external_partitioned_parquet_config, gcs_destination_config, dest_partitioned_table): """tests the basic external query ingestion mechanics with bq_transform.sql and external.json """ test_utils.check_blobs_exist( gcs_destination_config + gcs_external_partitioned_parquet_config, "config objects must exist") test_utils.check_blobs_exist(gcs_split_path_partitioned_parquet_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob( gcs_split_path_partitioned_parquet_data) expected_num_rows = 100 test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
def test_external_query_pure( bq, gcs_data, gcs_external_config, dest_table, ): """tests the basic external query ingestion mechanics with bq_transform.sql and external.json """ test_utils.check_blobs_exist(gcs_data, "test data objects must exist") test_utils.check_blobs_exist(gcs_external_config, "config objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_data) test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") expected_num_rows = sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_table, expected_num_rows)
def test_backlog_publisher_with_existing_backfill_file(gcs, gcs_bucket, dest_dataset, dest_partitioned_table, gcs_partitioned_data): """Test basic functionality of backlog_publisher when the backfill is already running. It should not repost this backfill file. """ test_utils.check_blobs_exist(gcs_partitioned_data, "test data objects must exist") table_prefix = "/".join( [dest_dataset.dataset_id, dest_partitioned_table.table_id]) backfill_blob: storage.Blob = gcs_bucket.blob( f"{table_prefix}/{gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME}" ) backfill_blob.upload_from_string("") backfill_blob.reload() original_backfill_blob_generation = backfill_blob.generation table_prefix = "" # load each partition. for gcs_data in gcs_partitioned_data: if gcs_data.name.endswith( gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME): table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix( gcs, gcs_data) gcs_ocn_bq_ingest.common.ordering.backlog_publisher(gcs, gcs_data) # Use of queue to test that list responses are returned in expected order. expected_backlog_blobs = queue.Queue() expected_backlog_blobs.put("/".join([ table_prefix, "_backlog", "$2017041101", gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME ])) expected_backlog_blobs.put("/".join([ table_prefix, "_backlog", "$2017041102", gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME ])) for backlog_blob in gcs_bucket.list_blobs( prefix=f"{table_prefix}/_backlog"): assert backlog_blob.name == expected_backlog_blobs.get(block=False) backfill_blob.reload() assert backfill_blob.generation == original_backfill_blob_generation
def test_get_batches_for_gsurl_recursive( gcs, gcs_bucket, gcs_split_path_partitioned_parquet_data, gcs_external_partitioned_parquet_config, ): """tests that all blobs are recursively found for a given prefix """ test_utils.check_blobs_exist(gcs_external_partitioned_parquet_config, "config objects must exist") test_utils.check_blobs_exist(gcs_split_path_partitioned_parquet_data, "test data objects must exist") batches = gcs_ocn_bq_ingest.common.utils.get_batches_for_gsurl( gcs, f"gs://{gcs_bucket.name}/", recursive=True) total_data_objects = 0 for batch in batches: print(batch) total_data_objects += len(batch) assert total_data_objects == 4
def test_look_for_destination_config_in_parents( bq, gcs_split_path_partitioned_data, gcs_destination_config, dest_partitioned_table, ): """test discovery of configuration files for destination in parent _config paths. """ test_utils.check_blobs_exist(gcs_destination_config, "config objects must exist") test_utils.check_blobs_exist(gcs_split_path_partitioned_data, "test data must exist") test_utils.trigger_gcf_for_each_blob(gcs_split_path_partitioned_data) expected_num_rows = 0 for part in ["$2017041101", "$2017041102"]: test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nyc_311", part, "nyc_311.csv") expected_num_rows += sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
def test_load_job_partitioned(bq, gcs_partitioned_data, gcs_truncating_load_config, dest_partitioned_table): """ Test loading separate partitions with WRITE_TRUNCATE after both load jobs the count should equal the sum of the test data in both partitions despite having WRITE_TRUNCATE disposition because the destination table should target only a particular partition with a decorator. """ test_utils.check_blobs_exist(gcs_truncating_load_config, "the load.json is missing") test_utils.check_blobs_exist(gcs_partitioned_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_partitioned_data) expected_num_rows = 0 for part in ["$2017041101", "$2017041102"]: test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nyc_311", part, "nyc_311.csv") expected_num_rows += sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
def test_ordered_load_parquet_hive_partitioning( monkeypatch, gcs, bq, gcs_bucket, gcs_destination_parquet_config_hive_partitioned, gcs_external_hive_partitioned_parquet_config, gcs_split_path_partitioned_parquet_data, dest_hive_partitioned_table): """Test ordered loads of parquet data files Set global env variable ORDER_PER_TABLE so that all loads are ordered. Test to make sure that parquet data files are loaded in order. """ monkeypatch.setenv("ORDER_PER_TABLE", "True") monkeypatch.setenv("START_BACKFILL_FILENAME", "_HISTORYDONE") # Must reload the constants file in order to pick up testing mock env vars importlib.reload(gcs_ocn_bq_ingest.common.constants) test_utils.check_blobs_exist(gcs_split_path_partitioned_parquet_data, "test data objects must exist") table_prefix = "" for gcs_data in gcs_split_path_partitioned_parquet_data: if gcs_data.name.endswith( gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME): table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix( gcs, gcs_data) break # Invoke cloud function for all data blobs and _SUCCESS blob. # Cloud function shouldn't take any action at this point because there is # no _HISTORYDONE file yet. test_utils.trigger_gcf_for_each_blob( gcs_split_path_partitioned_parquet_data) # Upload _HISTORYDONE file which will cause cloud function to take action backfill_start_blob: storage.Blob = gcs_bucket.blob( f"{table_prefix}/" f"{gcs_ocn_bq_ingest.common.constants.START_BACKFILL_FILENAME}") backfill_start_blob.upload_from_string("") test_utils.check_blobs_exist([backfill_start_blob], "_HISTORYDONE file was" "not created.") test_utils.trigger_gcf_for_each_blob([backfill_start_blob]) # Check to make sure _BACKFILL file has been craeted backfill_blob: storage.Blob = gcs_bucket.blob( f"{table_prefix}/{gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME}" ) test_utils.check_blobs_exist([backfill_blob], "_BACKFILL file was not created by method" "start_backfill_subscriber_if_not_running") test_utils.trigger_gcf_for_each_blob([backfill_blob]) expected_num_rows = 100 test_utils.bq_wait_for_rows(bq, dest_hive_partitioned_table, expected_num_rows) # Check to make sure the hive_part_column column values were correctly inserted # into the BigQuery destination table. for row in bq.query( f"SELECT DISTINCT hive_part_column " f"FROM `{dest_hive_partitioned_table.full_table_id.replace(':','.')}`" ).result(): assert row.hive_part_column == 9999
def test_external_query_partitioned_with_destination_config( bq, gcs_partitioned_data, gcs_external_partitioned_config, gcs_destination_config, dest_partitioned_table): """tests the basic external query ingestion mechanics with bq_transform.sql, external.json, and destination config in load.json. """ test_utils.check_blobs_exist( (gcs_external_partitioned_config + gcs_destination_config), "config objects must exist") test_utils.check_blobs_exist(gcs_partitioned_data, "test data must exist") test_utils.trigger_gcf_for_each_blob(gcs_partitioned_data + gcs_external_partitioned_config + gcs_destination_config) expected_num_rows = 0 for part in [ "$2017041101", "$2017041102", ]: test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nyc_311", part, "nyc_311.csv") expected_num_rows += sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows)
def test_load_job_truncating_batches( bq, gcs_batched_data, gcs_truncating_load_config, dest_table, ): """ tests two successive batches with a load.json that dictates WRITE_TRUNCATE. after both load jobs the count should be the same as the number of lines in the test file because we should pick up the WRITE_TRUNCATE disposition. """ test_utils.check_blobs_exist( gcs_truncating_load_config, "the test is not configured correctly the load.json is missing") test_utils.check_blobs_exist(gcs_batched_data, "test data objects must exist") test_utils.trigger_gcf_for_each_blob(gcs_batched_data) test_data_file = os.path.join(TEST_DIR, "resources", "test-data", "nation", "part-m-00001") expected_num_rows = sum(1 for _ in open(test_data_file)) test_utils.bq_wait_for_rows(bq, dest_table, expected_num_rows)
def test_backlog_publisher(gcs, gcs_bucket, gcs_partitioned_data): """Test basic functionality of backlog_publisher Drop two success files. Assert that both success files are added to backlog and backfill file created. Assert that that only one backfill file is not recreated. """ test_utils.check_blobs_exist(gcs_partitioned_data, "test data objects must exist") table_prefix = "" # load each partition. for gcs_data in gcs_partitioned_data: if gcs_data.name.endswith( gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME): table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix( gcs, gcs_data) gcs_ocn_bq_ingest.common.ordering.backlog_publisher(gcs, gcs_data) expected_backlog_blobs = queue.Queue() expected_backlog_blobs.put("/".join([ table_prefix, "_backlog", "$2017041101", gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME ])) expected_backlog_blobs.put("/".join([ table_prefix, "_backlog", "$2017041102", gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME ])) for backlog_blob in gcs_bucket.list_blobs( prefix=f"{table_prefix}/_backlog"): assert backlog_blob.name == expected_backlog_blobs.get(block=False) backfill_blob: storage.Blob = gcs_bucket.blob( f"{table_prefix}/{gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME}" ) assert backfill_blob.exists()
def test_ordered_load_parquet_wait_for_validation( monkeypatch, gcs, bq, gcs_bucket, gcs_destination_parquet_config, gcs_external_partitioned_parquet_config, gcs_split_path_partitioned_parquet_data, dest_partitioned_table): """Test ordered loads of parquet data files with a validation step between each load. Set global env variable ORDER_PER_TABLE so that all loads are ordered. Test to make sure that parquet data files are loaded in order. """ monkeypatch.setenv("ORDER_PER_TABLE", "True") monkeypatch.setenv("START_BACKFILL_FILENAME", "_HISTORYDONE") monkeypatch.setenv("WAIT_FOR_VALIDATION", "True") # Must reload the constants file in order to pick up testing mock env vars importlib.reload(gcs_ocn_bq_ingest.common.constants) test_utils.check_blobs_exist(gcs_split_path_partitioned_parquet_data, "test data objects must exist") table_prefix = "" for gcs_data in gcs_split_path_partitioned_parquet_data: if gcs_data.name.endswith( gcs_ocn_bq_ingest.common.constants.SUCCESS_FILENAME): table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix( gcs, gcs_data) break # Upload _HISTORYDONE file which will cause cloud function to take action backfill_start_blob: storage.Blob = gcs_bucket.blob( f"{table_prefix}/" f"{gcs_ocn_bq_ingest.common.constants.START_BACKFILL_FILENAME}") backfill_start_blob.upload_from_string("") test_utils.check_blobs_exist([backfill_start_blob], "_HISTORYDONE file was" "not created.") test_utils.trigger_gcf_for_each_blob([backfill_start_blob]) # Invoke cloud function for all data blobs and _SUCCESS blob. # Cloud function shouldn't take any action at this point because there is # no _HISTORYDONE file yet. test_utils.trigger_gcf_for_each_blob( gcs_split_path_partitioned_parquet_data) # Check to make sure _BACKFILL file has been craeted backfill_blob: storage.Blob = gcs_bucket.blob( f"{table_prefix}/{gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME}" ) test_utils.check_blobs_exist([backfill_blob], "_BACKFILL file was not created by method" "start_backfill_subscriber_if_not_running") test_utils.trigger_gcf_for_each_blob([backfill_blob]) # Test to make sure that _bqlock is not present since cloud function should # remove the lock in between validations with pytest.raises(NotFound): test_utils.check_blobs_exist( [gcs_bucket.blob(f"{table_prefix}/_bqlock")]) # Check that the first batch of data was loaded but only the first batch, # since the second batch is waiting on confirmation of validation. expected_num_rows = 50 test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows) # Upload _BACKFILL file to signal that validation has completed and # that the next item in the _backlog can be processed. backfill_blob.upload_from_string("") test_utils.trigger_gcf_for_each_blob([backfill_blob]) # Check that the second batch was loaded expected_num_rows = 100 test_utils.bq_wait_for_rows(bq, dest_partitioned_table, expected_num_rows) # Upload _BACKFILL file to signal that validation has completed. # There won't be another chunk to load so this _BACKFILL file # should signal the cloud function to remove _BACKFILL file # and backlog directory. backfill_blob.upload_from_string("") test_utils.trigger_gcf_for_each_blob([backfill_blob]) # Test to make sure that _BACKFILL file is not present since cloud function should # remove the _BACKFILL file after final load/validation is complete. with pytest.raises(NotFound): test_utils.check_blobs_exist( [gcs_bucket.blob(f"{table_prefix}/_BACKFILL")])
def test_backlog_subscriber_in_order_with_new_batch_while_running( bq, gcs, gcs_bucket, dest_ordered_update_table: bigquery.Table, gcs_ordered_update_data: List[storage.Blob], gcs_external_update_config: List[storage.Blob], gcs_backlog: List[storage.Blob]): """Test functionality of backlog subscriber when new batches are added before the subscriber is done finishing the existing backlog. Populate a backlog with 3 files that make updates where we can assert that these jobs were applied in order. In another process populate a fourth batch, and call the publisher. """ test_utils.check_blobs_exist(gcs_external_update_config, "config objects must exist") test_utils.check_blobs_exist(gcs_ordered_update_data, "test data objects must exist") # Cannot pickle clients to another process so we need to recreate some # objects without the client property. for blob in gcs_external_update_config: basename = os.path.basename(blob.name) # Only perform the following actions for the backfill config file if basename == gcs_ocn_bq_ingest.common.constants.BACKFILL_FILENAME: backfill_blob = storage.Blob.from_string( f"gs://{blob.bucket.name}/" f"{blob.name}") bkt = storage.Bucket(None, gcs_bucket.name) claim_blob: storage.Blob = blob.bucket.blob( blob.name.replace( basename, f"_claimed_{basename}_created_at_" f"{blob.time_created.timestamp()}")) # Run subscriber w/ backlog and publisher w/ new batch in parallel. with multiprocessing.Pool(processes=3) as pool: res_subscriber = pool.apply_async(_run_subscriber, (None, None, backfill_blob)) # wait for existence of claim blob # to ensure subscriber is running. while not claim_blob.exists(): pass res_backlog_publisher = pool.apply_async( _post_a_new_batch, (bkt, dest_ordered_update_table)) res_backlog_publisher.wait() res_monitor = pool.apply_async( gcs_ocn_bq_ingest.common.ordering.subscriber_monitor, (None, bkt, storage.Blob( f"{dest_ordered_update_table.project}" f".{dest_ordered_update_table.dataset_id}/" f"{dest_ordered_update_table.table_id}/" f"_backlog/04/_SUCCESS", bkt))) if res_monitor.get(): print( "subscriber monitor had to retrigger subscriber loop") backfill_blob.reload(client=gcs) _run_subscriber(None, None, backfill_blob) res_subscriber.wait() table_prefix = gcs_ocn_bq_ingest.common.utils.get_table_prefix( gcs, blob) backlog_blobs = gcs_bucket.list_blobs(prefix=f"{table_prefix}/" f"_backlog/") assert backlog_blobs.num_results == 0, "backlog is not empty" bqlock_blob: storage.Blob = gcs_bucket.blob("_bqlock") assert not bqlock_blob.exists(), "_bqlock was not cleaned up" rows = bq.query("SELECT alpha_update FROM " f"{dest_ordered_update_table.dataset_id}" f".{dest_ordered_update_table.table_id}") expected_num_rows = 1 num_rows = 0 for row in rows: num_rows += 1 assert row[ "alpha_update"] == "ABCD", "backlog not applied in order" assert num_rows == expected_num_rows