def export_table(table_name: str, cloud_sql_to_bq_config: CloudSqlToBQConfig) -> bool: """Export a Cloud SQL table to a CSV file on GCS. Given a table name and export_query, retrieve the export URI from cloud_sql_to_bq_config, then execute the export operation and wait until it completes. Args: table_name: Table to export. cloud_sql_to_bq_config: The export config class for the table's SchemaType. Returns: True if operation succeeded without errors, False if not. """ schema_type = cloud_sql_to_bq_config.schema_type export_query = cloud_sql_to_bq_config.get_table_export_query(table_name) export_uri = cloud_sql_to_bq_config.get_gcs_export_uri_for_table(table_name) export_context = create_export_context(schema_type, export_uri, export_query) project_id = metadata.project_id() instance_id = SQLAlchemyEngineManager.get_stripped_cloudsql_instance_id(schema_type) export_request = ( sqladmin_client() .instances() .export(project=project_id, instance=instance_id, body=export_context) ) logging.info("GCS URI [%s] in project [%s]", export_uri, project_id) logging.info("Starting export: [%s]", str(export_request.to_json())) try: response = export_request.execute() except googleapiclient.errors.HttpError: logging.exception("Failed to export table [%s]", table_name) return False # We need to block until the operation is done because # the Cloud SQL API only supports one operation at a time. operation_id = response["name"] logging.info( "Waiting for export operation [%s] to complete for table [%s] " "in database [%s] in project [%s]", operation_id, table_name, instance_id, project_id, ) operation_success = wait_until_operation_finished(operation_id) return operation_success
def _throw_if_error(project_id: str, operation_id: str, operation_type: str) -> None: operation = sqladmin_client().operations().get( project=project_id, operation=operation_id).execute() if 'error' in operation: errors = operation['error'].get('errors', []) error_messages = [ 'code: {}\n message: {}'.format(error['code'], error['message']) for error in errors ] raise RuntimeError('Backup {} operation finished with ' '{} errors:\n{}'.format(operation_type, str(len(errors)), '\n'.join(error_messages)))
def _throw_if_error(project_id: str, operation_id: str, operation_type: str) -> None: operation = (sqladmin_client().operations().get( project=project_id, operation=operation_id).execute()) if "error" in operation: errors = operation["error"].get("errors", []) error_messages = [ "code: {}\n message: {}".format(error["code"], error["message"]) for error in errors ] raise RuntimeError("Backup {} operation finished with " "{} errors:\n{}".format(operation_type, str(len(errors)), "\n".join(error_messages)))
def _await_operation(project_id: str, operation_id: str) -> None: done = False while True: if done: break operation = sqladmin_client().operations().get( project=project_id, operation=operation_id).execute() current_status = operation['status'] if current_status in {'PENDING', 'RUNNING', 'UNKNOWN'}: time.sleep(_SECONDS_BETWEEN_OPERATION_STATUS_CHECKS) elif current_status == 'DONE': done = True else: raise RuntimeError( 'Unrecognized operation status: {}'.format(current_status))
def _await_operation(project_id: str, operation_id: str) -> None: done = False while True: if done: break operation = (sqladmin_client().operations().get( project=project_id, operation=operation_id).execute()) current_status = operation["status"] if current_status in {"PENDING", "RUNNING", "UNKNOWN"}: time.sleep(_SECONDS_BETWEEN_OPERATION_STATUS_CHECKS) elif current_status == "DONE": done = True else: raise RuntimeError( "Unrecognized operation status: {}".format(current_status))
def export_table(schema_type: SchemaType, table_name: str, export_query: str) \ -> bool: """Export a Cloud SQL table to a CSV file on GCS. Given a table name and export_query, retrieve the export URI from export_config, then execute the export operation and wait until it completes. Args: schema_type: The schema, either SchemaType.JAILS or SchemaType.STATE, where this table lives. table_name: Table to export. export_query: Corresponding query for the table. Returns: True if operation succeeded without errors, False if not. """ export_uri = export_config.gcs_export_uri(table_name) export_context = create_export_context(schema_type, export_uri, export_query) project_id = metadata.project_id() instance_id = \ SQLAlchemyEngineManager.get_stripped_cloudql_instance_id(schema_type) export_request = sqladmin_client().instances().export(project=project_id, instance=instance_id, body=export_context) logging.info("Starting export: [%s]", str(export_request.to_json())) try: response = export_request.execute() except googleapiclient.errors.HttpError: logging.exception("Failed to export table [%s]", table_name) return False # We need to block until the operation is done because # the Cloud SQL API only supports one operation at a time. operation_id = response['name'] logging.info( "Waiting for export operation [%s] to complete for table [%s] " "in database [%s] in project [%s]", operation_id, table_name, instance_id, project_id) operation_success = wait_until_operation_finished(operation_id) return operation_success
def wait_until_operation_finished(operation_id: str) -> bool: """Monitor a Cloud SQL operation's progress and wait until it completes. We must wait until completion becuase only one Cloud SQL operation can run at a time. Args: operation_id: Cloud SQL Operation ID. Returns: True if operation succeeded without errors, False if not. See here for details: https://cloud.google.com/sql/docs/postgres/admin-api/v1beta4/operations/get """ operation_in_progress = True operation_success = False while operation_in_progress: get_operation = sqladmin_client().operations().get( project=metadata.project_id(), operation=operation_id) operation = get_operation.execute() operation_status = operation['status'] if operation_status in {'PENDING', 'RUNNING', 'UNKNOWN'}: time.sleep(SECONDS_BETWEEN_OPERATION_STATUS_CHECKS) elif operation_status == 'DONE': operation_in_progress = False logging.debug("Operation [%s] status: [%s]", operation_id, operation_status) if 'error' in operation: errors = operation['error'].get('errors', []) for error in errors: logging.error( "Operation %s finished with error: %s, %s\n%s", operation_id, error.get('kind'), error.get('code'), error.get('message')) else: logging.info("Operation [%s] succeeded.", operation_id) operation_success = True return operation_success
def get_operation_with_retries(operation_id: str) -> Dict[str, Any]: num_retries = 3 while num_retries > 0: # We need to guard here for possible 404 HttpErrors if the operation hasn't started yet try: get_operation = sqladmin_client().operations().get( project=metadata.project_id(), operation=operation_id) return get_operation.execute() except googleapiclient.errors.HttpError as error: # If we get a 404 HttpError, wait a few seconds and then retry getting the operation instance. if error.resp.status == HTTPStatus.NOT_FOUND and num_retries > 0: logging.debug( "HttpError when requesting operation_id [%s]. Retrying request: %s", operation_id, num_retries) time.sleep(SECONDS_BETWEEN_OPERATION_STATUS_CHECKS) num_retries -= 1 else: raise raise ValueError("Operation not set, request for the operation failed.")
def update_long_term_backups_for_cloudsql_instance(project_id: str, instance_id: str) -> None: """Create a new manual backup for the given sqlalchemy instance and delete manual backups for that instance that are older than _MAX_BACKUP_AGE_DAYS. """ logging.info('Creating request for backup insert operation on [%s]', instance_id) insert_request = sqladmin_client().backupRuns().insert( project=project_id, instance=instance_id, body={}) logging.info('Beginning backup insert operation on [%s]', instance_id) insert_operation = insert_request.execute() _await_operation(project_id, insert_operation['name']) _throw_if_error(project_id, insert_operation['name'], 'insert') logging.info('Backup insert operation on [%s] completed', instance_id) logging.info('Creating request for backup list operation on [%s]', instance_id) list_request = sqladmin_client().backupRuns().list(project=project_id, instance=instance_id) logging.info('Beginning backup list request') list_result = list_request.execute() backup_runs = list_result['items'] manual_backup_runs = [ backup_run for backup_run in backup_runs if backup_run['type'] == 'ON_DEMAND' ] logging.info( 'Backup list request for [%s] completed with [%s] total backup' ' runs and [%s] manual backup runs', instance_id, str(len(backup_runs)), str(len(manual_backup_runs))) # startTime is a string with format yyyy-mm-dd, so sorting it as a # string will give the same result as converting it to a date and then # sorting by date manual_backup_runs.sort(key=lambda backup_run: backup_run['startTime']) six_months_ago_datetime = \ datetime.datetime.utcnow() - datetime.timedelta( days=_MAX_BACKUP_AGE_DAYS) six_months_ago_date_str = six_months_ago_datetime.date().isoformat() for backup_run in manual_backup_runs: backup_start_date_str = backup_run['startTime'] if backup_start_date_str > six_months_ago_date_str: break backup_id = backup_run['id'] logging.info( 'Creating request for backup delete operation for backup ' '[%s] of [%s]', backup_id, instance_id) delete_request = sqladmin_client().backupRuns().delete( project=project_id, instance=instance_id, id=backup_id) logging.info( 'Beginning backup delete operation for backup [%s] of [%s]', backup_id, instance_id) delete_operation = delete_request.execute() _await_operation(project_id, delete_operation['name']) _throw_if_error(project_id, delete_operation['name'], 'delete') logging.info( 'Backup delete operation completed for backup [%s] of [%s]', backup_id, instance_id)
def update_long_term_backups_for_cloudsql_instance(project_id: str, instance_id: str) -> None: """Create a new manual backup for the given sqlalchemy instance and delete manual backups for that instance that are older than _MAX_BACKUP_AGE_DAYS. """ logging.info("Creating request for backup insert operation on [%s]", instance_id) insert_request = (sqladmin_client().backupRuns().insert( project=project_id, instance=instance_id, body={})) logging.info("Beginning backup insert operation on [%s]", instance_id) insert_operation = insert_request.execute() _await_operation(project_id, insert_operation["name"]) _throw_if_error(project_id, insert_operation["name"], "insert") logging.info("Backup insert operation on [%s] completed", instance_id) logging.info("Creating request for backup list operation on [%s]", instance_id) list_request = (sqladmin_client().backupRuns().list(project=project_id, instance=instance_id)) logging.info("Beginning backup list request") list_result = list_request.execute() backup_runs = list_result["items"] manual_backup_runs = [ backup_run for backup_run in backup_runs if backup_run["type"] == "ON_DEMAND" ] logging.info( "Backup list request for [%s] completed with [%s] total backup" " runs and [%s] manual backup runs", instance_id, str(len(backup_runs)), str(len(manual_backup_runs)), ) # startTime is a string with format yyyy-mm-dd, so sorting it as a # string will give the same result as converting it to a date and then # sorting by date manual_backup_runs.sort(key=lambda backup_run: backup_run["startTime"]) six_months_ago_datetime = datetime.datetime.now( tz=pytz.UTC) - datetime.timedelta(days=_MAX_BACKUP_AGE_DAYS) six_months_ago_date_str = six_months_ago_datetime.date().isoformat() for backup_run in manual_backup_runs: backup_start_date_str = backup_run["startTime"] if backup_start_date_str > six_months_ago_date_str: break backup_id = backup_run["id"] logging.info( "Creating request for backup delete operation for backup " "[%s] of [%s]", backup_id, instance_id, ) delete_request = (sqladmin_client().backupRuns().delete( project=project_id, instance=instance_id, id=backup_id)) logging.info( "Beginning backup delete operation for backup [%s] of [%s]", backup_id, instance_id, ) delete_operation = delete_request.execute() _await_operation(project_id, delete_operation["name"]) _throw_if_error(project_id, delete_operation["name"], "delete") logging.info( "Backup delete operation completed for backup [%s] of [%s]", backup_id, instance_id, )