def __delete_data_process(bq_client, dataset, condition, table_name_list): # create temp dataset temp_dataset_name = __create_temp_dataset(bq_client) # get data we need to temp dataset with the same table name job_table_map = {} print "Create tables with data remain." table_package_list = utils.get_table_packages(table_name_list) for table_name_package in table_package_list: for table_name in table_name_package: job_id = __query_out_remain_data(bq_client, table_name, condition, dataset, temp_dataset_name) job_table_map[job_id] = table_name utils.thruhold_jobs(bq_client) # wait job to finish print "Wait for all temp tables are created." utils.wait_all_job_finish(bq_client, job_table_map.keys()) # TODO: validate the temp data # delete the origin table print "Delete origin tables." table_delete_handler._delete_table_list(bq_client, dataset, table_name_list, ignore_confirm=True) # copy table with the data we want to the origin dataset print "Copy the remaining data." table_copy_handler._copy_table_list(bq_client, temp_dataset_name, dataset, table_name_list, ignore_confirm=True)
def _copy_table_list( bq_client, org_dataset, dest_dataset, table_name_list, ignore_confirm=False): # ask for confirmation if not ignore_confirm: print "" for table_name in table_name_list: print table_name print "" print "The [%d] tables above is going to be copied." \ % len(table_name_list) print "From dataset: [%s]" % org_dataset print "To dataset: [%s]" % dest_dataset print "" print "Is it ok? [y/N]" proceed_choices = ['yes', 'y'] abort_choices = ['no', 'n'] while True: choice = raw_input().lower() if choice in proceed_choices: break if choice in abort_choices: return else: print "Please enter [y or n]" # get launch packages package_list = utils.get_table_packages(table_name_list) for sub_table_name_list in package_list: # check running jobs utils.thruhold_jobs(bq_client) for table_name in sub_table_name_list: __copy_table( bq_client, org_dataset, dest_dataset, table_name, table_name)